-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: Allow spill-at-single-def for pure defs #85251
JIT: Allow spill-at-single-def for pure defs #85251
Conversation
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue Detailsnull
|
/azp run runtime-coreclr jitstress, runtime-coreclr libraries-jitstress, runtime-coreclr jitstressregs, Fuzzlyn |
Azure Pipelines successfully started running 4 pipeline(s). |
Fuzzlyn failures look like #85225. jitstressregs failures look like #85081. libraries-jitstress failure looks like #83659. cc @dotnet/jit-contrib PTAL @kunalspathak. Diffs. |
@@ -3423,15 +3423,11 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition | |||
} | |||
} | |||
|
|||
// Only handle the singledef intervals whose firstRefPosition is RefTypeDef and is not yet marked as spillAfter. | |||
// The singledef intervals whose firstRefPositions are already marked as spillAfter, no need to mark them as |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am trying to understand the situation where we should spill at single-def regardless of spillAfter
is marked. Can you show a before and after def too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here is a diff:
@@ -1,142 +1,136 @@
; Assembly listing for method System.Numerics.Complex:MaxMagnitude(System.Numerics.Complex,System.Numerics.Complex):System.Numerics.Complex
; Emitting BLENDED_CODE for X64 CPU with AVX512 - Windows
; optimized code
; rsp based frame
; partially interruptible
; No matching PGO data
; 0 inlinees with PGO data; 8 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 RetBuf [V00,T02] ( 7, 5 ) byref -> rsi single-def
; V01 arg0 [V01,T00] ( 4, 8 ) byref -> rdx single-def
; V02 arg1 [V02,T01] ( 4, 8 ) byref -> r8 single-def
-; V03 loc0 [V03,T04] ( 5, 3.50) double -> [rsp+48H]
+; V03 loc0 [V03,T04] ( 5, 3.50) double -> [rsp+48H] spill-single-def
; V04 loc1 [V04,T08] ( 3, 2.50) double -> mm0
; V05 OutArgs [V05 ] ( 1, 1 ) struct (32) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V06 tmp1 [V06 ] ( 0, 0 ) double -> zero-ref "Inlining Arg"
;* V07 tmp2 [V07 ] ( 0, 0 ) double -> zero-ref "Inlining Arg"
;* V08 tmp3 [V08 ] ( 0, 0 ) double -> zero-ref "Inlining Arg"
;* V09 tmp4 [V09 ] ( 0, 0 ) double -> zero-ref "Inlining Arg"
;* V10 tmp5 [V10 ] ( 0, 0 ) double -> zero-ref "Inlining Arg"
; V11 tmp6 [V11,T03] ( 2, 4 ) byref -> rax single-def "Single return block return value"
; V12 tmp7 [V12,T05] ( 5, 3.50) double -> [rsp+40H] spill-single-def V16.m_real(offs=0x00) P-INDEP "field V01.m_real (fldOffset=0x0)"
; V13 tmp8 [V13,T09] ( 3, 2.50) double -> [rsp+38H] spill-single-def V16.m_imaginary(offs=0x08) P-INDEP "field V01.m_imaginary (fldOffset=0x8)"
-; V14 tmp9 [V14,T07] ( 4, 3 ) double -> [rsp+30H] V17.m_real(offs=0x00) P-INDEP "field V02.m_real (fldOffset=0x0)"
-; V15 tmp10 [V15,T06] ( 5, 3.50) double -> [rsp+28H] V17.m_imaginary(offs=0x08) P-INDEP "field V02.m_imaginary (fldOffset=0x8)"
+; V14 tmp9 [V14,T07] ( 4, 3 ) double -> [rsp+30H] spill-single-def V17.m_real(offs=0x00) P-INDEP "field V02.m_real (fldOffset=0x0)"
+; V15 tmp10 [V15,T06] ( 5, 3.50) double -> [rsp+28H] spill-single-def V17.m_imaginary(offs=0x08) P-INDEP "field V02.m_imaginary (fldOffset=0x8)"
;* V16 tmp11 [V16 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref"
;* V17 tmp12 [V17 ] ( 0, 0 ) struct (16) zero-ref "Promoted implicit byref"
;
; Lcl frame size = 80
G_M13999_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
push rsi
sub rsp, 80
vzeroupper
mov rsi, rcx
; byrRegs +[rsi]
;; size=11 bbWeight=1 PerfScore 2.50
G_M13999_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0144 {rdx rsi r8}, byref, isz
; byrRegs +[rdx r8]
vmovsd xmm0, qword ptr [r8]
vmovsd qword ptr [rsp+30H], xmm0
vmovsd xmm1, qword ptr [r8+08H]
vmovsd qword ptr [rsp+28H], xmm1
vmovsd xmm2, qword ptr [rdx]
vmovsd qword ptr [rsp+40H], xmm2
vmovsd xmm3, qword ptr [rdx+08H]
vmovsd qword ptr [rsp+38H], xmm3
vmovaps xmm0, xmm2
vmovaps xmm1, xmm3
call [System.Numerics.Complex:Hypot(double,double):double]
; byrRegs -[rdx r8]
; gcr arg pop 0
vmovsd qword ptr [rsp+48H], xmm0
vmovsd xmm0, qword ptr [rsp+30H]
vmovsd xmm1, qword ptr [rsp+28H]
call [System.Numerics.Complex:Hypot(double,double):double]
; gcr arg pop 0
vmovsd xmm1, qword ptr [rsp+48H]
vucomisd xmm1, xmm0
ja SHORT G_M13999_IG04
;; size=94 bbWeight=1 PerfScore 39.50
G_M13999_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
vucomisd xmm1, xmm1
jp SHORT G_M13999_IG04
je SHORT G_M13999_IG05
;; size=8 bbWeight=0.50 PerfScore 2.00
-G_M13999_IG04: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref
+G_M13999_IG04: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
vmovsd xmm0, qword ptr [rsp+40H]
vmovsd qword ptr [rsi], xmm0
vmovsd xmm1, qword ptr [rsp+38H]
vmovsd qword ptr [rsi+08H], xmm1
- jmp G_M13999_IG08
- ;; size=26 bbWeight=0.50 PerfScore 6.00
+ jmp SHORT G_M13999_IG08
+ ;; size=23 bbWeight=0.50 PerfScore 6.00
G_M13999_IG05: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
vucomisd xmm1, xmm0
jp SHORT G_M13999_IG07
jne SHORT G_M13999_IG07
vmovsd xmm0, qword ptr [rsp+30H]
vmovd rax, xmm0
test rax, rax
jge SHORT G_M13999_IG06
vmovsd xmm1, qword ptr [rsp+28H]
vmovd rax, xmm1
test rax, rax
- vmovsd qword ptr [rsp+28H], xmm1
- vmovsd qword ptr [rsp+30H], xmm0
jl SHORT G_M13999_IG04
vmovsd xmm2, qword ptr [rsp+40H]
vmovd rax, xmm2
test rax, rax
jge SHORT G_M13999_IG04
jmp SHORT G_M13999_IG07
- ;; size=70 bbWeight=0.50 PerfScore 13.38
+ ;; size=58 bbWeight=0.50 PerfScore 12.38
G_M13999_IG06: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref, isz
vmovsd xmm1, qword ptr [rsp+28H]
vmovd rax, xmm1
test rax, rax
jge SHORT G_M13999_IG10
vmovsd xmm2, qword ptr [rsp+40H]
vmovd rax, xmm2
test rax, rax
- vmovsd qword ptr [rsp+28H], xmm1
- vmovsd qword ptr [rsp+30H], xmm0
- jge G_M13999_IG04
- ;; size=48 bbWeight=0.50 PerfScore 7.25
+ jge SHORT G_M13999_IG04
+ ;; size=32 bbWeight=0.50 PerfScore 6.25
G_M13999_IG07: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref
vmovsd xmm0, qword ptr [rsp+30H]
vmovsd qword ptr [rsi], xmm0
vmovsd xmm1, qword ptr [rsp+28H]
vmovsd qword ptr [rsi+08H], xmm1
;; size=21 bbWeight=0.50 PerfScore 5.00
G_M13999_IG08: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, byref
mov rax, rsi
; byrRegs +[rax]
;; size=3 bbWeight=1 PerfScore 0.25
G_M13999_IG09: ; bbWeight=1, epilog, nogc, extend
add rsp, 80
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
G_M13999_IG10: ; bbWeight=0.25, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0040 {rsi}, gcvars, byref, isz
; byrRegs -[rax]
- vmovsd qword ptr [rsp+28H], xmm1
- vmovsd qword ptr [rsp+30H], xmm0
jmp SHORT G_M13999_IG07
- ;; size=14 bbWeight=0.25 PerfScore 1.00
+ ;; size=2 bbWeight=0.25 PerfScore 0.50
-; Total bytes of code 301, prolog size 8, PerfScore 108.73, instruction count 69, allocated bytes for code 301 (MethodHash=3c1ac950) for method System.Numerics.Complex:MaxMagnitude(System.Numerics.Complex,System.Numerics.Complex):System.Numerics.Complex
+; Total bytes of code 258, prolog size 8, PerfScore 101.93, instruction count 63, allocated bytes for code 258 (MethodHash=3c1ac950) for method System.Numerics.Complex:MaxMagnitude(System.Numerics.Complex,System.Numerics.Complex):System.Numerics.Complex
From what I understand, spillAfter == false
means spill at the def but the value computed in the register will also be used subsequently. spillAfter == true
means spill at the def without such a subsequent use ("pure def")
/azp run runtime-coreclr libraries-jitstress2-jitstressregs |
Azure Pipelines successfully started running 1 pipeline(s). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
Allow the spill-at-single-def logic to kick in for defs without subsequent uses before the spill.
I noticed this while working on #85105. My PR affected the IR in the following way:
I.e. the ASG
[000516]
is placed more eagerly than before (where it was[000517]
) and farther away from its use. This unexpectedly lead to the spill-at-single-def logic no longer kicking in -- the eager assignment is a "pure def" as the comment in LSRA calls it, and hasspillAfter == true
, so the logic to mark the def withsingleDefSpill
was not kicking in.