forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X64] [MihaZupan] Improve Ascii.FromUtf16 and FromHexString #395
Comments
Top method improvements-54 (-6.09 % of base) - System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong ; Assembly listing for method System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 13 single block inlinees; 17 inlinees without PGO data
+; 0 inlinees with PGO data; 21 single block inlinees; 25 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T06] ( 9, 9 ) long -> rdi single-def
; V01 arg1 [V01,T04] ( 15, 12 ) long -> rsi single-def
; V02 arg2 [V02,T11] ( 9, 6 ) long -> rdx single-def
; V03 loc0 [V03,T00] ( 23, 30 ) long -> rax
; V04 loc1 [V04,T12] ( 13, 6.50) int -> rcx
;* V05 loc2 [V05 ] ( 0, 0 ) int -> zero-ref
; V06 loc3 [V06,T05] ( 7, 14 ) long -> registers
; V07 loc4 [V07,T22] ( 5, 2.50) long -> rdx
; V08 loc5 [V08,T16] ( 2, 4.50) long -> rcx
;# V09 OutArgs [V09 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V10 tmp1 [V10,T23] ( 3, 1.50) long -> rax "Inline return value spill temp"
; V11 tmp2 [V11,T07] ( 5, 9.50) byref -> rax single-def "Inline stloc first use temp"
; V12 tmp3 [V12,T30] ( 14, 17.50) simd64 -> mm0 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
; V13 tmp4 [V13,T13] ( 5, 6 ) byref -> rcx single-def "Inline stloc first use temp"
-; V14 tmp5 [V14,T01] ( 12, 27 ) long -> r8 "Inline stloc first use temp"
-; V15 tmp6 [V15,T17] ( 2, 4.50) long -> r9 "Inline stloc first use temp"
-; V16 tmp7 [V16,T34] ( 3, 12 ) simd64 -> mm2 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-;* V17 tmp8 [V17 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
-; V18 tmp9 [V18,T24] ( 3, 1.50) long -> rax "Inline return value spill temp"
-; V19 tmp10 [V19,T08] ( 5, 9.50) byref -> rax single-def "Inline stloc first use temp"
-; V20 tmp11 [V20,T31] ( 14, 17.50) simd32 -> mm0 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-; V21 tmp12 [V21,T14] ( 5, 6 ) byref -> rcx single-def "Inline stloc first use temp"
-; V22 tmp13 [V22,T02] ( 12, 27 ) long -> r8 "Inline stloc first use temp"
-; V23 tmp14 [V23,T18] ( 2, 4.50) long -> r9 "Inline stloc first use temp"
-; V24 tmp15 [V24,T35] ( 3, 12 ) simd32 -> mm2 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V25 tmp16 [V25 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V26 tmp17 [V26 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V27 tmp18 [V27 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V28 tmp19 [V28 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V29 tmp20 [V29 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V30 tmp21 [V30 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V31 tmp22 [V31 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V32 tmp23 [V32 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V33 tmp24 [V33 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-; V34 tmp25 [V34,T25] ( 3, 1.50) long -> rax "Inline return value spill temp"
-;* V35 tmp26 [V35,T27] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
-;* V36 tmp27 [V36 ] ( 0, 0 ) long -> zero-ref "Inline stloc first use temp"
-; V37 tmp28 [V37,T09] ( 5, 9.50) byref -> rax single-def "Inline stloc first use temp"
-; V38 tmp29 [V38,T32] ( 14, 17.50) simd16 -> mm0 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-; V39 tmp30 [V39,T15] ( 5, 6 ) byref -> rcx single-def "Inline stloc first use temp"
-;* V40 tmp31 [V40 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V41 tmp32 [V41,T03] ( 11, 26.50) long -> r8 "Inline stloc first use temp"
-; V42 tmp33 [V42,T19] ( 2, 4.50) long -> r9 "Inline stloc first use temp"
-; V43 tmp34 [V43,T36] ( 3, 12 ) simd16 -> mm2 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V44 tmp35 [V44 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V45 tmp36 [V45,T33] ( 2, 16 ) simd16 -> mm0 "Spilling op1 side effects for HWIntrinsic"
-;* V46 tmp37 [V46 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-;* V47 tmp38 [V47 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V48 tmp39 [V48 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V49 tmp40 [V49 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V50 tmp41 [V50 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V51 tmp42 [V51 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V52 tmp43 [V52 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V53 tmp44 [V53 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V54 tmp45 [V54 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V55 tmp46 [V55 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V56 tmp47 [V56 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V57 tmp48 [V57 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
-;* V58 tmp49 [V58 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-;* V59 tmp50 [V59 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
-; V60 tmp51 [V60,T28] ( 3, 24 ) simd16 -> mm0 "dup spill"
-;* V61 tmp52 [V61 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
-;* V62 tmp53 [V62 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
-; V63 tmp54 [V63,T20] ( 3, 3 ) byref -> r8 single-def "Inlining Arg"
-; V64 tmp55 [V64,T21] ( 3, 3 ) byref -> rdx "Inlining Arg"
-;* V65 tmp56 [V65,T26] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-; V66 cse0 [V66,T10] ( 3, 8.50) long -> r10 "CSE #12: conservative"
-; V67 cse1 [V67,T37] ( 5, 6 ) simd64 -> mm1 "CSE #02: conservative"
-; V68 cse2 [V68,T38] ( 5, 6 ) simd32 -> mm1 "CSE #07: conservative"
-; V69 cse3 [V69,T39] ( 5, 6 ) simd16 -> mm1 "CSE #11: conservative"
-; V70 rat0 [V70,T29] ( 3, 24 ) simd64 -> mm3 "ReplaceWithLclVar is creating a new local variable"
+;* V14 tmp5 [V14 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+; V15 tmp6 [V15,T01] ( 12, 27 ) long -> r8 "Inline stloc first use temp"
+; V16 tmp7 [V16,T17] ( 2, 4.50) long -> r9 "Inline stloc first use temp"
+; V17 tmp8 [V17,T36] ( 3, 12 ) simd64 -> mm3 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V18 tmp9 [V18 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+; V19 tmp10 [V19,T33] ( 2, 16 ) simd64 -> mm0 "Spilling op1 side effects for HWIntrinsic"
+;* V20 tmp11 [V20 ] ( 0, 0 ) simd64 -> zero-ref "spilled call-like call argument"
+;* V21 tmp12 [V21 ] ( 0, 0 ) simd64 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V22 tmp13 [V22 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V23 tmp14 [V23 ] ( 0, 0 ) simd64 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V24 tmp15 [V24 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V25 tmp16 [V25 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ushort]>
+;* V26 tmp17 [V26 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V27 tmp18 [V27 ] ( 0, 0 ) simd64 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+;* V28 tmp19 [V28 ] ( 0, 0 ) simd64 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector512`1[ubyte]>
+; V29 tmp20 [V29,T24] ( 3, 1.50) long -> rax "Inline return value spill temp"
+; V30 tmp21 [V30,T08] ( 5, 9.50) byref -> rax single-def "Inline stloc first use temp"
+; V31 tmp22 [V31,T31] ( 14, 17.50) simd32 -> mm0 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+; V32 tmp23 [V32,T14] ( 5, 6 ) byref -> rcx single-def "Inline stloc first use temp"
+;* V33 tmp24 [V33 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+; V34 tmp25 [V34,T02] ( 12, 27 ) long -> r8 "Inline stloc first use temp"
+; V35 tmp26 [V35,T18] ( 2, 4.50) long -> r9 "Inline stloc first use temp"
+; V36 tmp27 [V36,T37] ( 3, 12 ) simd32 -> mm2 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V37 tmp28 [V37 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+; V38 tmp29 [V38,T34] ( 2, 16 ) simd32 -> mm0 "Spilling op1 side effects for HWIntrinsic"
+;* V39 tmp30 [V39 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V40 tmp31 [V40 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V41 tmp32 [V41 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V42 tmp33 [V42 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V43 tmp34 [V43 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V44 tmp35 [V44 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V45 tmp36 [V45 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V46 tmp37 [V46 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V47 tmp38 [V47 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V48 tmp39 [V48 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V49 tmp40 [V49 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V50 tmp41 [V50 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V51 tmp42 [V51 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V52 tmp43 [V52 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V53 tmp44 [V53 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V54 tmp45 [V54 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V55 tmp46 [V55 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+; V56 tmp47 [V56,T25] ( 3, 1.50) long -> rax "Inline return value spill temp"
+;* V57 tmp48 [V57,T27] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+;* V58 tmp49 [V58 ] ( 0, 0 ) long -> zero-ref "Inline stloc first use temp"
+; V59 tmp50 [V59,T09] ( 5, 9.50) byref -> rax single-def "Inline stloc first use temp"
+; V60 tmp51 [V60,T32] ( 14, 17.50) simd16 -> mm0 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+; V61 tmp52 [V61,T15] ( 5, 6 ) byref -> rcx single-def "Inline stloc first use temp"
+;* V62 tmp53 [V62 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+; V63 tmp54 [V63,T03] ( 11, 26.50) long -> r8 "Inline stloc first use temp"
+; V64 tmp55 [V64,T19] ( 2, 4.50) long -> r9 "Inline stloc first use temp"
+; V65 tmp56 [V65,T38] ( 3, 12 ) simd16 -> mm2 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V66 tmp57 [V66 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+; V67 tmp58 [V67,T35] ( 2, 16 ) simd16 -> mm0 "Spilling op1 side effects for HWIntrinsic"
+;* V68 tmp59 [V68 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
+;* V69 tmp60 [V69 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V70 tmp61 [V70 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V71 tmp62 [V71 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V72 tmp63 [V72 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V73 tmp64 [V73 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V74 tmp65 [V74 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V75 tmp66 [V75 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V76 tmp67 [V76 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V77 tmp68 [V77 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V78 tmp69 [V78 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V79 tmp70 [V79 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ushort]>
+;* V80 tmp71 [V80 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+;* V81 tmp72 [V81 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
+; V82 tmp73 [V82,T28] ( 3, 24 ) simd16 -> mm0 "dup spill"
+;* V83 tmp74 [V83 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[uint]>
+;* V84 tmp75 [V84 ] ( 0, 0 ) byref -> zero-ref "Inlining Arg"
+; V85 tmp76 [V85,T20] ( 3, 3 ) byref -> r8 single-def "Inlining Arg"
+; V86 tmp77 [V86,T21] ( 3, 3 ) byref -> rdx "Inlining Arg"
+;* V87 tmp78 [V87,T26] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+; V88 cse0 [V88,T10] ( 3, 8.50) long -> r10 "CSE #05: conservative"
+; V89 cse1 [V89,T39] ( 5, 6 ) simd64 -> mm1 "CSE #01: conservative"
+; V90 cse2 [V90,T40] ( 5, 6 ) simd32 -> mm1 "CSE #03: conservative"
+; V91 cse3 [V91,T41] ( 5, 6 ) simd16 -> mm1 "CSE #04: conservative"
+; V92 cse4 [V92,T42] ( 5, 6 ) simd64 -> mm2 "CSE #02: conservative"
+; V93 rat0 [V93,T29] ( 3, 24 ) simd64 -> mm4 "ReplaceWithLclVar is creating a new local variable"
;
; Lcl frame size = 0
G_M6063_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M6063_IG02:
xor eax, eax
cmp rdx, 32
jb G_M6063_IG26
;; size=12 bbWeight=1 PerfScore 1.50
G_M6063_IG03:
mov rcx, qword ptr [rdi]
mov r8, 0xD1FFAB1E
test rcx, r8
mov r8, rcx
jne G_M6063_IG28
cmp rdx, 128
jae G_M6063_IG19
cmp rdx, 64
jae G_M6063_IG11
mov rax, rdi
vmovups xmm0, xmmword ptr [rax]
vmovups xmm1, xmmword ptr [reloc @RWD00]
vptest xmm0, xmm1
jne G_M6063_IG09
mov rcx, rsi
vpackuswb xmm0, xmm0, xmm0
vmovsd qword ptr [rcx], xmm0
mov r8d, 8
test sil, 8
jne SHORT G_M6063_IG04
vmovups xmm0, xmmword ptr [rax+0x10]
vptest xmm0, xmm1
jne SHORT G_M6063_IG08
vpackuswb xmm0, xmm0, xmm0
vmovsd qword ptr [rcx+0x08], xmm0
;; size=118 bbWeight=0.50 PerfScore 16.62
G_M6063_IG04:
mov r8, rsi
and r8, 15
neg r8
add r8, 16
lea r9, [rdx-0x10]
align [0 bytes for IG05]
;; size=18 bbWeight=0.50 PerfScore 0.75
G_M6063_IG05:
vmovups xmm0, xmmword ptr [rax+2*r8]
lea r10, [r8+0x08]
vmovups xmm2, xmmword ptr [rax+2*r10]
vpor xmm3, xmm0, xmm2
vptest xmm3, xmm1
je SHORT G_M6063_IG07
;; size=27 bbWeight=4 PerfScore 51.33
G_M6063_IG06:
vptest xmm0, xmm1
jne SHORT G_M6063_IG08
vpackuswb xmm0, xmm0, xmm0
vmovsd qword ptr [rcx+r8], xmm0
mov r8, r10
jmp SHORT G_M6063_IG08
align [0 bytes for IG13]
;; size=22 bbWeight=0.50 PerfScore 4.62
G_M6063_IG07:
vpackuswb xmm0, xmm0, xmm2
vmovups xmmword ptr [rcx+r8], xmm0
add r8, 16
cmp r8, r9
jbe SHORT G_M6063_IG05
;; size=19 bbWeight=4 PerfScore 18.00
G_M6063_IG08:
mov rax, r8
jmp SHORT G_M6063_IG10
;; size=5 bbWeight=0.50 PerfScore 1.12
G_M6063_IG09:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M6063_IG10:
jmp G_M6063_IG26
;; size=5 bbWeight=0.50 PerfScore 1.00
G_M6063_IG11:
mov rax, rdi
vmovups ymm0, ymmword ptr [rax]
vmovups ymm1, ymmword ptr [reloc @RWD32]
vptest ymm0, ymm1
jne G_M6063_IG17
mov rcx, rsi
- vpmovwb ymm0, ymm2
- vpmovwb ymm0, ymm0
- vinserti128 ymm0, ymm2, xmm0, 1
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, -40
vmovups xmmword ptr [rcx], xmm0
mov r8d, 16
test sil, 16
jne SHORT G_M6063_IG12
vmovups ymm0, ymmword ptr [rax+0x20]
vptest ymm0, ymm1
- jne G_M6063_IG16
- vpmovwb ymm0, ymm2
- vpmovwb ymm0, ymm0
- vinserti128 ymm0, ymm2, xmm0, 1
+ jne SHORT G_M6063_IG16
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, -40
vmovups xmmword ptr [rcx+0x10], xmm0
- ;; size=102 bbWeight=0.50 PerfScore 24.00
+ ;; size=82 bbWeight=0.50 PerfScore 19.00
G_M6063_IG12:
mov r8, rsi
and r8, 31
neg r8
add r8, 32
lea r9, [rdx-0x20]
;; size=18 bbWeight=0.50 PerfScore 0.75
G_M6063_IG13:
vmovups ymm0, ymmword ptr [rax+2*r8]
vmovups ymm2, ymmword ptr [rax+2*r8+0x20]
vpor ymm3, ymm0, ymm2
vptest ymm3, ymm1
je SHORT G_M6063_IG15
;; size=24 bbWeight=4 PerfScore 65.33
G_M6063_IG14:
vptest ymm0, ymm1
jne SHORT G_M6063_IG16
- vpmovwb ymm0, ymm2
- vpmovwb ymm0, ymm0
- vinserti128 ymm1, ymm2, xmm0, 1
- vmovups xmmword ptr [rcx+r8], xmm1
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm2, ymm0, -40
+ vmovups xmmword ptr [rcx+r8], xmm2
add r8, 16
jmp SHORT G_M6063_IG16
align [0 bytes for IG21]
- ;; size=37 bbWeight=0.50 PerfScore 9.12
+ ;; size=29 bbWeight=0.50 PerfScore 6.62
G_M6063_IG15:
- vpmovwb ymm0, ymm0
- vpmovwb ymm2, ymm2
- vinserti128 ymm0, ymm0, xmm2, 1
+ vpackuswb ymm0, ymm0, ymm2
+ vpermq ymm0, ymm0, -40
vmovups ymmword ptr [rcx+r8], ymm0
add r8, 32
cmp r8, r9
jbe SHORT G_M6063_IG13
- ;; size=33 bbWeight=4 PerfScore 46.00
+ ;; size=25 bbWeight=4 PerfScore 26.00
G_M6063_IG16:
mov rax, r8
jmp SHORT G_M6063_IG18
;; size=5 bbWeight=0.50 PerfScore 1.12
G_M6063_IG17:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M6063_IG18:
jmp G_M6063_IG26
;; size=5 bbWeight=0.50 PerfScore 1.00
G_M6063_IG19:
mov rax, rdi
vmovups zmm0, zmmword ptr [rax]
vmovups zmm1, zmmword ptr [reloc @RWD64]
vptestmw k1, zmm1, zmm0
kortestd k1, k1
jne G_M6063_IG25
mov rcx, rsi
- vpmovwb zmm0, zmm2
- vpmovwb zmm0, zmm0
- vinserti64x4 zmm0, zmm2, ymm0, 1
+ vpackuswb zmm0, zmm0, zmm0
+ vmovups zmm2, zmmword ptr [reloc @RWD128]
+ vpermq zmm0, zmm2, zmm0
vmovups ymmword ptr [rcx], ymm0
mov r8d, 32
test sil, 32
jne SHORT G_M6063_IG20
vmovups zmm0, zmmword ptr [rax+0x40]
vptestmw k1, zmm1, zmm0
kortestd k1, k1
jne G_M6063_IG24
- vpmovwb zmm0, zmm2
- vpmovwb zmm0, zmm0
- vinserti64x4 zmm0, zmm2, ymm0, 1
+ vpackuswb zmm0, zmm0, zmm0
+ vpermq zmm0, zmm2, zmm0
vmovups ymmword ptr [rcx+0x20], ymm0
- ;; size=122 bbWeight=0.50 PerfScore 21.50
+ ;; size=118 bbWeight=0.50 PerfScore 18.00
G_M6063_IG20:
mov r8, rsi
and r8, 63
neg r8
add r8, 64
lea r9, [rdx-0x40]
;; size=18 bbWeight=0.50 PerfScore 0.75
G_M6063_IG21:
vmovups zmm0, zmmword ptr [rax+2*r8]
- vmovups zmm2, zmmword ptr [rax+2*r8+0x40]
- vmovaps zmm3, zmm0
- vpternlogd zmm3, zmm2, zmm1, -88
- vptestmw k1, zmm3, zmm3
+ vmovups zmm3, zmmword ptr [rax+2*r8+0x40]
+ vmovaps zmm4, zmm0
+ vpternlogd zmm4, zmm3, zmm1, -88
+ vptestmw k1, zmm4, zmm4
kortestd k1, k1
je SHORT G_M6063_IG23
;; size=41 bbWeight=4 PerfScore 55.00
G_M6063_IG22:
vptestmw k1, zmm1, zmm0
kortestd k1, k1
jne SHORT G_M6063_IG24
- vpmovwb zmm0, zmm2
- vpmovwb zmm0, zmm0
- vinserti64x4 zmm1, zmm2, ymm0, 1
- vmovups ymmword ptr [rcx+r8], ymm1
+ vpackuswb zmm0, zmm0, zmm0
+ vpermq zmm3, zmm2, zmm0
+ vmovups ymmword ptr [rcx+r8], ymm3
add r8, 32
jmp SHORT G_M6063_IG24
align [0 bytes for IG27]
- ;; size=44 bbWeight=0.50 PerfScore 8.62
+ ;; size=37 bbWeight=0.50 PerfScore 6.12
G_M6063_IG23:
- vpmovwb zmm0, zmm0
- vpmovwb zmm2, zmm2
- vinserti64x4 zmm0, zmm0, ymm2, 1
+ vpackuswb zmm0, zmm0, zmm3
+ vpermq zmm0, zmm2, zmm0
vmovups zmmword ptr [rcx+r8], zmm0
add r8, 64
cmp r8, r9
jbe SHORT G_M6063_IG21
- ;; size=35 bbWeight=4 PerfScore 46.00
+ ;; size=28 bbWeight=4 PerfScore 26.00
G_M6063_IG24:
mov rax, r8
jmp SHORT G_M6063_IG26
;; size=5 bbWeight=0.50 PerfScore 1.12
G_M6063_IG25:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M6063_IG26:
sub rdx, rax
cmp rdx, 4
jb SHORT G_M6063_IG30
lea rcx, [rax+rdx-0x04]
;; size=14 bbWeight=0.50 PerfScore 1.25
G_M6063_IG27:
mov r8, qword ptr [rdi+2*rax]
mov r9, 0xD1FFAB1E
test r8, r9
je SHORT G_M6063_IG29
;; size=19 bbWeight=4 PerfScore 14.00
G_M6063_IG28:
mov ecx, r8d
test ecx, 0xD1FFAB1E
jne SHORT G_M6063_IG31
lea rdx, [rsi+rax]
mov byte ptr [rdx], cl
shr ecx, 16
mov byte ptr [rdx+0x01], cl
shr r8, 32
mov ecx, r8d
add rax, 2
jmp SHORT G_M6063_IG31
;; size=36 bbWeight=0.50 PerfScore 3.75
G_M6063_IG29:
vmovd xmm0, r8
vpackuswb xmm0, xmm0, xmm0
vmovd dword ptr [rsi+rax], xmm0
add rax, 4
cmp rax, rcx
jbe SHORT G_M6063_IG27
;; size=23 bbWeight=4 PerfScore 26.00
G_M6063_IG30:
test dl, 2
je SHORT G_M6063_IG33
mov ecx, dword ptr [rdi+2*rax]
test ecx, 0xD1FFAB1E
je SHORT G_M6063_IG32
;; size=16 bbWeight=0.50 PerfScore 2.25
G_M6063_IG31:
test ecx, 0xFF80
je SHORT G_M6063_IG34
jmp SHORT G_M6063_IG35
;; size=10 bbWeight=0.50 PerfScore 1.62
G_M6063_IG32:
lea r8, [rsi+rax]
mov byte ptr [r8], cl
shr ecx, 16
mov byte ptr [r8+0x01], cl
add rax, 2
;; size=18 bbWeight=0.50 PerfScore 1.62
G_M6063_IG33:
test dl, 1
je SHORT G_M6063_IG35
movzx rcx, word ptr [rdi+2*rax]
cmp ecx, 127
ja SHORT G_M6063_IG35
;; size=14 bbWeight=0.50 PerfScore 2.25
G_M6063_IG34:
mov byte ptr [rsi+rax], cl
inc rax
;; size=6 bbWeight=0.50 PerfScore 0.62
G_M6063_IG35:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=1 PerfScore 2.50
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h
RWD16 dd 00000000h, 00000000h, 00000000h, 00000000h
RWD32 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
RWD64 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
+RWD128 dq 0000000000000000h, 0000000000000002h, 0000000000000004h, 0000000000000006h, 0000000000000001h, 0000000000000003h, 0000000000000005h, 0000000000000007h
-; Total bytes of code 886, prolog size 4, PerfScore 432.79, instruction count 210, allocated bytes for code 906 (MethodHash=53fae850) for method System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong (FullOpts)
+; Total bytes of code 832, prolog size 4, PerfScore 379.29, instruction count 203, allocated bytes for code 852 (MethodHash=53fae850) for method System.Text.Ascii:NarrowUtf16ToAscii(ulong,ulong,ulong):ulong (FullOpts) -39 (-4.58 % of base) - System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this ; Assembly listing for method System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 13 single block inlinees; 11 inlinees without PGO data
+; 0 inlinees with PGO data; 14 single block inlinees; 10 inlinees without PGO data
; Final local variable assignments
;
; V00 this [V00,T01] ( 9, 8 ) ref -> [rbp-0x38] this class-hnd EH-live single-def <System.Runtime.Caching.MemoryCacheStatistics>
-; V01 loc0 [V01,T19] ( 3, 2 ) ubyte -> [rbp-0x2C] do-not-enreg[M] EH-live
+; V01 loc0 [V01,T15] ( 3, 2 ) ubyte -> [rbp-0x2C] do-not-enreg[M] EH-live
;* V02 loc1 [V02 ] ( 0, 0 ) ref -> zero-ref class-hnd exact single-def <System.Threading.Timer>
-; V03 loc2 [V03,T18] ( 4, 2.50) ubyte -> [rbp-0x30] do-not-enreg[M] EH-live
+; V03 loc2 [V03,T14] ( 4, 2.50) ubyte -> [rbp-0x30] do-not-enreg[M] EH-live
;# V04 OutArgs [V04 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V05 tmp1 [V05,T06] ( 3, 6 ) ref -> rbx class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.CacheMemoryMonitor>
+; V05 tmp1 [V05,T05] ( 3, 6 ) ref -> rbx class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.CacheMemoryMonitor>
; V06 tmp2 [V06,T03] ( 4, 8 ) ref -> rbx class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerCallback>
-; V07 tmp3 [V07,T04] ( 4, 7 ) ref -> r15 class-hnd exact single-def "NewObj constructor temp" <System.Threading.Timer>
-; V08 tmp4 [V08,T07] ( 3, 6 ) ref -> r14 class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.GCHandleRef`1[System.Threading.Timer]>
+; V07 tmp3 [V07,T04] ( 4, 7 ) ref -> [rbp-0x40] class-hnd exact spill-single-def "NewObj constructor temp" <System.Threading.Timer>
+; V08 tmp4 [V08,T06] ( 3, 6 ) ref -> r12 class-hnd exact single-def "NewObj constructor temp" <System.Runtime.Caching.GCHandleRef`1[System.Threading.Timer]>
;* V09 tmp5 [V09 ] ( 0, 0 ) struct ( 8) zero-ref "location for address-of(RValue)" <System.Threading.AsyncFlowControl>
; V10 tmp6 [V10,T02] ( 5, 7 ) int -> r13 "Inlining Arg"
-; V11 tmp7 [V11,T13] ( 2, 3 ) int -> r14 "Inlining Arg"
+; V11 tmp7 [V11,T10] ( 2, 3 ) int -> r14 "Inlining Arg"
;* V12 tmp8 [V12 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inlining Arg"
-;* V13 tmp9 [V13,T22] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
+;* V13 tmp9 [V13,T17] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
;* V14 tmp10 [V14 ] ( 0, 0 ) int -> zero-ref ld-addr-op "Inlining Arg"
-;* V15 tmp11 [V15,T23] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
-; V16 tmp12 [V16,T00] ( 8, 15 ) ref -> r12 class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerQueueTimer>
-; V17 tmp13 [V17,T08] ( 3, 6 ) ref -> rbx class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerHolder>
-; V18 tmp14 [V18,T09] ( 3, 6 ) ref -> rbx class-hnd exact single-def "impAppendStmt" <System.Threading.TimerQueue[]>
-;* V19 tmp15 [V19 ] ( 0, 0 ) long -> zero-ref "impAppendStmt"
+;* V15 tmp11 [V15,T18] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
+; V16 tmp12 [V16,T00] ( 8, 15 ) ref -> [rbp-0x48] class-hnd exact spill-single-def "NewObj constructor temp" <System.Threading.TimerQueueTimer>
+; V17 tmp13 [V17,T07] ( 3, 6 ) ref -> r14 class-hnd exact single-def "NewObj constructor temp" <System.Threading.TimerHolder>
+; V18 tmp14 [V18,T08] ( 3, 6 ) ref -> [rbp-0x50] class-hnd exact spill-single-def "impAppendStmt" <System.Threading.TimerQueue[]>
+; V19 tmp15 [V19,T11] ( 2, 4 ) long -> rax "impAppendStmt"
;* V20 tmp16 [V20 ] ( 0, 0 ) int -> zero-ref "Inline return value spill temp"
-; V21 tmp17 [V21,T20] ( 3, 2 ) int -> rcx "Inline return value spill temp"
-; V22 tmp18 [V22,T05] ( 4, 7 ) int -> rcx "dup spill"
-;* V23 tmp19 [V23 ] ( 0, 0 ) int -> zero-ref single-def "Inline stloc first use temp"
-;* V24 tmp20 [V24 ] ( 0, 0 ) struct ( 8) zero-ref ld-addr-op "NewObj constructor temp" <System.Runtime.InteropServices.GCHandle>
-;* V25 tmp21 [V25 ] ( 0, 0 ) long -> zero-ref "Inline stloc first use temp"
-; V26 tmp22 [V26,T15] ( 4, 3.50) long -> rax "Inline stloc first use temp"
-;* V27 tmp23 [V27 ] ( 0, 0 ) ref -> zero-ref "field V09._thread (fldOffset=0x0)" P-INDEP
-; V28 tmp24 [V28,T21] ( 2, 2 ) long -> rax single-def "field V24._handle (fldOffset=0x0)" P-INDEP
-; V29 tmp25 [V29,T26] ( 2, 0 ) ref -> rdx single-def "argument with side effect"
-;* V30 tmp26 [V30 ] ( 0, 0 ) ref -> zero-ref single-def "argument with side effect"
-; V31 tmp27 [V31,T10] ( 3, 6 ) long -> rdx "index expr"
-; V32 PSPSym [V32,T24] ( 1, 1 ) long -> [rbp-0x40] do-not-enreg[V] "PSPSym"
-;* V33 cse0 [V33,T25] ( 0, 0 ) long -> zero-ref "CSE #03: moderate"
-; V34 cse1 [V34,T16] ( 3, 3 ) long -> rax "CSE #02: moderate"
-; V35 cse2 [V35,T17] ( 3, 3 ) int -> r14 "CSE #01: moderate"
-; V36 rat0 [V36,T14] ( 3, 4 ) long -> rax "TLS field access"
-; V37 rat1 [V37,T11] ( 3, 6 ) long -> rax "TLS access"
-; V38 rat2 [V38,T12] ( 3, 6 ) long -> rax "ThreadStaticBlockBase access"
+;* V21 tmp17 [V21 ] ( 0, 0 ) struct ( 8) zero-ref ld-addr-op "NewObj constructor temp" <System.Runtime.InteropServices.GCHandle>
+;* V22 tmp18 [V22 ] ( 0, 0 ) long -> zero-ref "Inline stloc first use temp"
+; V23 tmp19 [V23,T12] ( 4, 3.50) long -> rax "Inline stloc first use temp"
+;* V24 tmp20 [V24 ] ( 0, 0 ) ref -> zero-ref "field V09._thread (fldOffset=0x0)" P-INDEP
+; V25 tmp21 [V25,T16] ( 2, 2 ) long -> rax single-def "field V21._handle (fldOffset=0x0)" P-INDEP
+; V26 tmp22 [V26,T21] ( 2, 0 ) ref -> rdx single-def "argument with side effect"
+;* V27 tmp23 [V27 ] ( 0, 0 ) ref -> zero-ref single-def "argument with side effect"
+; V28 tmp24 [V28,T09] ( 3, 6 ) long -> rdx "index expr"
+; V29 PSPSym [V29,T19] ( 1, 1 ) long -> [rbp-0x60] do-not-enreg[V] "PSPSym"
+;* V30 cse0 [V30,T20] ( 0, 0 ) long -> zero-ref "CSE #02: moderate"
+; V31 cse1 [V31,T13] ( 3, 3 ) int -> r14 "CSE #01: moderate"
;
-; Lcl frame size = 24
+; Lcl frame size = 56
G_M44586_IG01:
push rbp
push r15
push r14
push r13
push r12
push rbx
- sub rsp, 24
- lea rbp, [rsp+0x40]
- mov qword ptr [rbp-0x40], rsp
+ sub rsp, 56
+ vzeroupper
+ lea rbp, [rsp+0x60]
+ mov qword ptr [rbp-0x60], rsp
mov gword ptr [rbp-0x38], rdi
- ;; size=27 bbWeight=1 PerfScore 8.75
+ ;; size=30 bbWeight=1 PerfScore 9.75
G_M44586_IG02:
mov dword ptr [rbp-0x2C], 1
;; size=7 bbWeight=1 PerfScore 1.00
G_M44586_IG03:
mov rdi, 0xD1FFAB1E ; System.Runtime.Caching.CacheMemoryMonitor
call CORINFO_HELP_NEWSFAST
mov rbx, rax
mov rdi, gword ptr [rbp-0x38]
mov edx, dword ptr [rdi+0x48]
mov rsi, gword ptr [rdi+0x20]
mov rdi, rbx
mov rax, 0xD1FFAB1E ; code for System.Runtime.Caching.CacheMemoryMonitor:.ctor(System.Runtime.Caching.MemoryCache,int):this
call [rax]System.Runtime.Caching.CacheMemoryMonitor:.ctor(System.Runtime.Caching.MemoryCache,int):this
mov rdi, gword ptr [rbp-0x38]
lea rdi, bword ptr [rdi+0x18]
mov rsi, rbx
call CORINFO_HELP_ASSIGN_REF
xor eax, eax
mov dword ptr [rbp-0x30], eax
;; size=65 bbWeight=1 PerfScore 14.00
G_M44586_IG04:
mov rax, 0xD1FFAB1E ; code for System.Threading.ExecutionContext:IsFlowSuppressed():ubyte
call [rax]System.Threading.ExecutionContext:IsFlowSuppressed():ubyte
test eax, eax
jne SHORT G_M44586_IG06
;; size=16 bbWeight=1 PerfScore 4.50
G_M44586_IG05:
mov rax, 0xD1FFAB1E ; code for System.Threading.ExecutionContext:SuppressFlow():System.Threading.AsyncFlowControl
call [rax]System.Threading.ExecutionContext:SuppressFlow():System.Threading.AsyncFlowControl
mov dword ptr [rbp-0x30], 1
;; size=19 bbWeight=0.50 PerfScore 2.12
G_M44586_IG06:
mov rdi, 0xD1FFAB1E ; System.Threading.TimerCallback
call CORINFO_HELP_NEWSFAST
mov rbx, rax
lea rdi, bword ptr [rbx+0x08]
mov rsi, gword ptr [rbp-0x38]
call CORINFO_HELP_ASSIGN_REF
mov rdi, 0xD1FFAB1E ; code for System.Runtime.Caching.MemoryCacheStatistics:CacheManagerTimerCallback(System.Object):this
mov qword ptr [rbx+0x18], rdi
mov rdi, 0xD1FFAB1E ; System.Threading.Timer
call CORINFO_HELP_NEWSFAST
- mov r15, rax
+ mov gword ptr [rbp-0x40], rax
mov rsi, gword ptr [rbp-0x38]
mov r14d, dword ptr [rsi+0x50]
mov r13d, r14d
cmp r13d, -1
- jl G_M44586_IG16
+ jl G_M44586_IG14
mov rdi, 0xD1FFAB1E ; System.Threading.TimerQueueTimer
call CORINFO_HELP_NEWSFAST
mov r12, rax
+ mov gword ptr [rbp-0x48], r12
lea rdi, bword ptr [r12+0x20]
mov rsi, rbx
call CORINFO_HELP_ASSIGN_REF
xor rax, rax
mov gword ptr [r12+0x28], rax
mov qword ptr [r12+0x48], -1
mov rax, 0xD1FFAB1E ; code for System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
call [rax]System.Threading.ExecutionContext:Capture():System.Threading.ExecutionContext
lea rdi, bword ptr [r12+0x30]
mov rsi, rax
call CORINFO_HELP_ASSIGN_REF
- mov rdi, 0xD1FFAB1E ; global ptr
- test byte ptr [rdi], 1
- je G_M44586_IG14
- ;; size=175 bbWeight=1 PerfScore 26.00
+ mov rax, 0xD1FFAB1E ; global ptr
+ test byte ptr [rax], 1
+ je G_M44586_IG12
+ ;; size=180 bbWeight=1 PerfScore 27.75
G_M44586_IG07:
- mov rdi, 0xD1FFAB1E ; data for System.Threading.TimerQueue:<Instances>k__BackingField
- mov rbx, gword ptr [rdi]
- mov rdi, 0xD1FFAB1E
+ mov rax, 0xD1FFAB1E ; data for System.Threading.TimerQueue:<Instances>k__BackingField
+ mov rbx, gword ptr [rax]
+ mov gword ptr [rbp-0x50], rbx
mov rax, 0xD1FFAB1E ; function address
- call rax
- cmp dword ptr [rax], 2
- jl G_M44586_IG15
- mov rax, qword ptr [rax+0x08]
- mov rax, qword ptr [rax+0x10]
- test rax, rax
- je G_M44586_IG15
- ;; size=61 bbWeight=1 PerfScore 15.00
+ ;; size=27 bbWeight=1 PerfScore 3.50
G_M44586_IG08:
- mov ecx, dword ptr [rax+0x0A38]
- lea edx, [rcx-0x01]
- mov dword ptr [rax+0x0A38], edx
- movzx rax, cx
- test eax, eax
- je SHORT G_M44586_IG10
- ;; size=22 bbWeight=1 PerfScore 5.00
-G_M44586_IG09:
- sar ecx, 16
- jmp SHORT G_M44586_IG11
- ;; size=5 bbWeight=0.50 PerfScore 1.25
-G_M44586_IG10:
- mov rax, 0xD1FFAB1E ; code for System.Threading.ProcessorIdCache:RefreshCurrentProcessorId():int
- call [rax]System.Threading.ProcessorIdCache:RefreshCurrentProcessorId():int
- mov ecx, eax
- ;; size=14 bbWeight=0.50 PerfScore 1.75
-G_M44586_IG11:
- mov eax, ecx
+ call rax ; Interop+Sys:SchedGetCpu():int
+ mov eax, eax
mov rdx, 0xD1FFAB1E ; data for System.Threading.TimerQueue:<Instances>k__BackingField
mov rdx, gword ptr [rdx]
mov edi, dword ptr [rdx+0x08]
- xor edx, edx
- div rdx:rax, rdi
+ cqo
+ idiv rdx:rax, rdi
+ mov rbx, gword ptr [rbp-0x50]
mov edi, dword ptr [rbx+0x08]
cmp rdx, rdi
- jae G_M44586_IG17
+ jae G_M44586_IG15
mov rsi, gword ptr [rbx+8*rdx+0x10]
+ mov r12, gword ptr [rbp-0x48]
lea rdi, bword ptr [r12+0x08]
call CORINFO_HELP_ASSIGN_REF
+ mov rdi, 0xD1FFAB1E ; global ptr
+ cmp dword ptr [rdi], 0
+ jne SHORT G_M44586_IG13
+ ;; size=75 bbWeight=1 PerfScore 89.00
+G_M44586_IG09:
cmp r13d, -1
- je SHORT G_M44586_IG13
- ;; size=56 bbWeight=1 PerfScore 73.75
-G_M44586_IG12:
+ je SHORT G_M44586_IG11
+ ;; size=6 bbWeight=1 PerfScore 1.25
+G_M44586_IG10:
mov rdi, r12
mov esi, r13d
mov edx, r14d
mov rax, 0xD1FFAB1E ; code for System.Threading.TimerQueueTimer:Change(uint,uint):ubyte:this
call [rax]System.Threading.TimerQueueTimer:Change(uint,uint):ubyte:this
;; size=21 bbWeight=0.50 PerfScore 2.00
-G_M44586_IG13:
+G_M44586_IG11:
mov rdi, 0xD1FFAB1E ; System.Threading.TimerHolder
call CORINFO_HELP_NEWFAST
- mov rbx, rax
- lea rdi, bword ptr [rbx+0x08]
+ mov r14, rax
+ lea rdi, bword ptr [r14+0x08]
mov rsi, r12
call CORINFO_HELP_ASSIGN_REF
+ mov r15, gword ptr [rbp-0x40]
lea rdi, bword ptr [r15+0x08]
- mov rsi, rbx
+ mov rsi, r14
call CORINFO_HELP_ASSIGN_REF
- jmp SHORT G_M44586_IG18
- ;; size=44 bbWeight=1 PerfScore 7.00
-G_M44586_IG14:
+ jmp SHORT G_M44586_IG16
+ ;; size=48 bbWeight=1 PerfScore 8.00
+G_M44586_IG12:
mov rdi, 0xD1FFAB1E
mov esi, 946
call CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
jmp G_M44586_IG07
;; size=25 bbWeight=0 PerfScore 0.00
-G_M44586_IG15:
- mov edi, 2
- call CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED
- jmp G_M44586_IG08
- ;; size=15 bbWeight=0 PerfScore 0.00
-G_M44586_IG16:
+G_M44586_IG13:
+ call CORINFO_HELP_POLL_GC
+ jmp SHORT G_M44586_IG09
+ ;; size=7 bbWeight=0 PerfScore 0.00
+G_M44586_IG14:
mov edi, 0x1710B
mov rsi, 0xD1FFAB1E
call CORINFO_HELP_STRCNS
mov rdx, rax
mov edi, r13d
mov esi, -1
mov rax, 0xD1FFAB1E ; code for System.ArgumentOutOfRangeException:ThrowLess[int](int,int,System.String)
call [rax]System.ArgumentOutOfRangeException:ThrowLess[int](int,int,System.String)
int3
;; size=44 bbWeight=0 PerfScore 0.00
-G_M44586_IG17:
+G_M44586_IG15:
call CORINFO_HELP_RNGCHKFAIL
int3
;; size=6 bbWeight=0 PerfScore 0.00
-G_M44586_IG18:
+G_M44586_IG16:
cmp dword ptr [rbp-0x30], 0
- je SHORT G_M44586_IG20
+ je SHORT G_M44586_IG18
;; size=6 bbWeight=1 PerfScore 3.00
-G_M44586_IG19:
+G_M44586_IG17:
mov rax, 0xD1FFAB1E ; code for System.Threading.ExecutionContext:RestoreFlow()
call [rax]System.Threading.ExecutionContext:RestoreFlow()
;; size=12 bbWeight=0.50 PerfScore 1.62
-G_M44586_IG20:
+G_M44586_IG18:
mov rdi, 0xD1FFAB1E ; System.Runtime.Caching.GCHandleRef`1[System.Threading.Timer]
call CORINFO_HELP_NEWSFAST
- mov r14, rax
+ mov r12, rax
mov rdi, r15
mov esi, 2
call System.Runtime.InteropServices.GCHandle:_InternalAlloc(System.Object,int):long
test rax, rax
- jne SHORT G_M44586_IG22
+ jne SHORT G_M44586_IG20
;; size=36 bbWeight=1 PerfScore 4.25
-G_M44586_IG21:
+G_M44586_IG19:
mov rdi, r15
mov esi, 2
mov rax, 0xD1FFAB1E ; code for System.Runtime.InteropServices.GCHandle:InternalAllocWithGCTransition(System.Object,int):long
call [rax]System.Runtime.InteropServices.GCHandle:InternalAllocWithGCTransition(System.Object,int):long
;; size=20 bbWeight=0.50 PerfScore 1.88
-G_M44586_IG22:
- mov qword ptr [r14+0x10], rax
+G_M44586_IG20:
+ mov qword ptr [r12+0x10], rax
mov rsi, gword ptr [rbp-0x38]
lea rdi, bword ptr [rsi+0x08]
- mov rsi, r14
+ mov rsi, r12
call CORINFO_HELP_ASSIGN_REF
xor edi, edi
mov dword ptr [rbp-0x2C], edi
- ;; size=25 bbWeight=1 PerfScore 5.00
-G_M44586_IG23:
- add rsp, 24
+ ;; size=26 bbWeight=1 PerfScore 5.00
+G_M44586_IG21:
+ add rsp, 56
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
;; size=15 bbWeight=1 PerfScore 4.25
-G_M44586_IG24:
+G_M44586_IG22:
push rbp
push r15
push r14
push r13
push r12
push rbx
push rax
+ vzeroupper
mov rbp, qword ptr [rdi]
mov qword ptr [rsp], rbp
- lea rbp, [rbp+0x40]
- ;; size=22 bbWeight=0 PerfScore 0.00
-G_M44586_IG25:
+ lea rbp, [rbp+0x60]
+ ;; size=25 bbWeight=0 PerfScore 0.00
+G_M44586_IG23:
cmp dword ptr [rbp-0x30], 0
- je SHORT G_M44586_IG26
+ je SHORT G_M44586_IG24
mov rax, 0xD1FFAB1E ; code for System.Threading.ExecutionContext:RestoreFlow()
call [rax]System.Threading.ExecutionContext:RestoreFlow()
;; size=18 bbWeight=0 PerfScore 0.00
-G_M44586_IG26:
+G_M44586_IG24:
nop
;; size=1 bbWeight=0 PerfScore 0.00
-G_M44586_IG27:
+G_M44586_IG25:
add rsp, 8
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
;; size=15 bbWeight=0 PerfScore 0.00
-G_M44586_IG28:
+G_M44586_IG26:
push rbp
push r15
push r14
push r13
push r12
push rbx
push rax
+ vzeroupper
mov rbp, qword ptr [rdi]
mov qword ptr [rsp], rbp
- lea rbp, [rbp+0x40]
- ;; size=22 bbWeight=0 PerfScore 0.00
-G_M44586_IG29:
+ lea rbp, [rbp+0x60]
+ ;; size=25 bbWeight=0 PerfScore 0.00
+G_M44586_IG27:
cmp dword ptr [rbp-0x2C], 0
- je SHORT G_M44586_IG30
+ je SHORT G_M44586_IG28
mov rdi, gword ptr [rbp-0x38]
mov rax, 0xD1FFAB1E ; code for System.Runtime.Caching.MemoryCacheStatistics:Dispose():this
call [rax]System.Runtime.Caching.MemoryCacheStatistics:Dispose():this
;; size=22 bbWeight=0 PerfScore 0.00
-G_M44586_IG30:
+G_M44586_IG28:
nop
;; size=1 bbWeight=0 PerfScore 0.00
-G_M44586_IG31:
+G_M44586_IG29:
add rsp, 8
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
;; size=15 bbWeight=0 PerfScore 0.00
-; Total bytes of code 852, prolog size 27, PerfScore 182.12, instruction count 213, allocated bytes for code 852 (MethodHash=c52151d5) for method System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this (FullOpts)
+; Total bytes of code 813, prolog size 30, PerfScore 182.88, instruction count 205, allocated bytes for code 813 (MethodHash=c52151d5) for method System.Runtime.Caching.MemoryCacheStatistics:InitDisposableMembers():this (FullOpts) -36 (-16.00 % of base) - System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong ; Assembly listing for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 0 single block inlinees; 4 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 8 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T04] ( 3, 3 ) long -> rdi single-def
; V01 arg1 [V01,T03] ( 5, 3.50) long -> rsi single-def
; V02 arg2 [V02,T05] ( 3, 2.50) long -> rdx single-def
; V03 loc0 [V03,T01] ( 5, 10.50) byref -> rdi single-def
; V04 loc1 [V04,T07] ( 14, 18.50) simd32 -> mm0 <System.Runtime.Intrinsics.Vector256`1[ushort]>
; V05 loc2 [V05,T02] ( 5, 6 ) byref -> rax single-def
; V06 loc3 [V06,T00] ( 12, 27 ) long -> rcx
; V07 loc4 [V07,T06] ( 2, 4.50) long -> rdx
-; V08 loc5 [V08,T08] ( 3, 12 ) simd32 -> mm2 <System.Runtime.Intrinsics.Vector256`1[ushort]>
+; V08 loc5 [V08,T09] ( 3, 12 ) simd32 -> mm2 <System.Runtime.Intrinsics.Vector256`1[ushort]>
;# V09 OutArgs [V09 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-;* V10 tmp1 [V10 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V11 tmp2 [V11 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V12 tmp3 [V12 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V13 tmp4 [V13 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V10 tmp1 [V10 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+;* V11 tmp2 [V11 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
+; V12 tmp3 [V12,T08] ( 2, 16 ) simd32 -> mm0 "Spilling op1 side effects for HWIntrinsic"
+;* V13 tmp4 [V13 ] ( 0, 0 ) simd32 -> zero-ref "spilled call-like call argument"
;* V14 tmp5 [V14 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
;* V15 tmp6 [V15 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V16 tmp7 [V16 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-;* V17 tmp8 [V17 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V18 tmp9 [V18 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
-; V19 cse0 [V19,T09] ( 5, 7 ) simd32 -> mm1 "CSE #02: moderate"
+;* V16 tmp7 [V16 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V17 tmp8 [V17 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V18 tmp9 [V18 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V19 tmp10 [V19 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V20 tmp11 [V20 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V21 tmp12 [V21 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V22 tmp13 [V22 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V23 tmp14 [V23 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V24 tmp15 [V24 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V25 tmp16 [V25 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V26 tmp17 [V26 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V27 tmp18 [V27 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector256`1[ushort]>
+;* V28 tmp19 [V28 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+;* V29 tmp20 [V29 ] ( 0, 0 ) simd32 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+; V30 cse0 [V30,T10] ( 5, 7 ) simd32 -> mm1 "CSE #01: moderate"
;
; Lcl frame size = 0
G_M60588_IG01:
push rbp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
G_M60588_IG02:
vmovups ymm0, ymmword ptr [rdi]
vmovups ymm1, ymmword ptr [reloc @RWD00]
vptest ymm0, ymm1
jne G_M60588_IG10
;; size=23 bbWeight=1 PerfScore 15.00
G_M60588_IG03:
mov rax, rsi
- vpmovwb ymm0, ymm2
- vpmovwb ymm0, ymm0
- vinserti128 ymm0, ymm2, xmm0, 1
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, -40
vmovups xmmword ptr [rax], xmm0
mov ecx, 16
test sil, 16
jne SHORT G_M60588_IG04
vmovups ymm0, ymmword ptr [rdi+0x20]
vptest ymm0, ymm1
- jne G_M60588_IG08
- vpmovwb ymm0, ymm2
- vpmovwb ymm0, ymm0
- vinserti128 ymm0, ymm2, xmm0, 1
+ jne SHORT G_M60588_IG08
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm0, ymm0, -40
vmovups xmmword ptr [rax+0x10], xmm0
- ;; size=75 bbWeight=0.50 PerfScore 16.38
+ ;; size=55 bbWeight=0.50 PerfScore 11.38
G_M60588_IG04:
and rsi, 31
mov rcx, rsi
neg rcx
add rcx, 32
add rdx, -32
align [0 bytes for IG05]
;; size=18 bbWeight=0.50 PerfScore 0.62
G_M60588_IG05:
vmovups ymm0, ymmword ptr [rdi+2*rcx]
vmovups ymm2, ymmword ptr [rdi+2*rcx+0x20]
vpor ymm3, ymm0, ymm2
vptest ymm3, ymm1
je SHORT G_M60588_IG07
;; size=22 bbWeight=4 PerfScore 65.33
G_M60588_IG06:
vptest ymm0, ymm1
jne SHORT G_M60588_IG08
- vpmovwb ymm0, ymm2
- vpmovwb ymm0, ymm0
- vinserti128 ymm1, ymm2, xmm0, 1
- vmovups xmmword ptr [rax+rcx], xmm1
+ vpackuswb ymm0, ymm0, ymm0
+ vpermq ymm2, ymm0, -40
+ vmovups xmmword ptr [rax+rcx], xmm2
add rcx, 16
jmp SHORT G_M60588_IG08
- ;; size=36 bbWeight=0.50 PerfScore 9.12
+ ;; size=28 bbWeight=0.50 PerfScore 6.62
G_M60588_IG07:
- vpmovwb ymm0, ymm0
- vpmovwb ymm2, ymm2
- vinserti128 ymm0, ymm0, xmm2, 1
+ vpackuswb ymm0, ymm0, ymm2
+ vpermq ymm0, ymm0, -40
vmovups ymmword ptr [rax+rcx], ymm0
add rcx, 32
cmp rcx, rdx
jbe SHORT G_M60588_IG05
- ;; size=32 bbWeight=4 PerfScore 46.00
+ ;; size=24 bbWeight=4 PerfScore 26.00
G_M60588_IG08:
mov rax, rcx
;; size=3 bbWeight=0.50 PerfScore 0.12
G_M60588_IG09:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
G_M60588_IG10:
xor eax, eax
;; size=2 bbWeight=0.50 PerfScore 0.12
G_M60588_IG11:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=0.50 PerfScore 1.25
RWD00 dq FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h, FF80FF80FF80FF80h
-; Total bytes of code 225, prolog size 4, PerfScore 156.46, instruction count 55, allocated bytes for code 225 (MethodHash=910c1353) for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong (FullOpts)
+; Total bytes of code 189, prolog size 4, PerfScore 128.96, instruction count 51, allocated bytes for code 189 (MethodHash=910c1353) for method System.Text.Ascii:NarrowUtf16ToAscii_Intrinsified_256(ulong,ulong,ulong):ulong (FullOpts) Larger list of diffs: https://gist.github.com/MihuBot/f60c7254060f436ae9c12ded701c85f8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Job completed in 17 minutes.
Diffs
Artifacts:
The text was updated successfully, but these errors were encountered: