-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Perf] Regressions in System.Buffers.Tests.ReadOnlySequenceTests<Char> #72025
Comments
I couldn't figure out the best area label to add to this issue. If you have write-permissions please help me learn by adding exactly one area label. |
Also related: dotnet/perf-autofiling-issues#6700 |
Seems related to #71687 |
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue DetailsRun Information
Regressions in System.Buffers.Tests.ReadOnlySequenceTests<Char>
Reprogit clone https://github.com/dotnet/performance.git
python3 .\performance\scripts\benchmarks_ci.py -f net6.0 --filter 'System.Buffers.Tests.ReadOnlySequenceTests<Char>*' PayloadsHistogramSystem.Buffers.Tests.ReadOnlySequenceTests<Char>.IterateGetPositionSingleSegment
Description of detection logic
DocsProfiling workflow for dotnet/runtime repository
|
It does not repro for me on my 5950X: | Method | Job | Toolchain | Mean | Error | StdDev | Median | Min | Max | Ratio | RatioSD | Allocated | Alloc Ratio |
|------------------------ |----------- |------------------ |---------:|---------:|---------:|---------:|---------:|---------:|------:|--------:|----------:|------------:|
| IterateGetPositionArray | Job-LDWMZZ | /base/corerun | 11.32 ns | 0.154 ns | 0.144 ns | 11.29 ns | 11.14 ns | 11.62 ns | 1.00 | 0.00 | - | NA |
| IterateGetPositionArray | Job-IVJMSU | /no-71687/corerun | 11.30 ns | 0.132 ns | 0.124 ns | 11.24 ns | 11.18 ns | 11.51 ns | 1.00 | 0.02 | - | NA | Looks like the code size change due to #71687 may cause us to hit jcc erratum. I assume the CPUs used in benchmarking runs suffer from this problem (which my 5950X doesn't)? mov gword ptr [rbp-50H], r9
mov dword ptr [rbp-3CH], edx
cmp r14, r9
- je G_M25988_IG16
+ je SHORT G_M25988_IG16
mov r9, gword ptr [rbp-50H]
mov r10d, dword ptr [rbp-3CH]
-; ............................... 32B boundary ...............................
mov rdi, r14
test rdi, rdi
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (test: 2 ; jcc erratum) 32B boundary ...............................
je SHORT G_M25988_IG15
- ;; size=51 bbWeight=2 PerfScore 19.00
+ ;; size=47 bbWeight=2 PerfScore 19.00
G_M25988_IG13:
mov rsi, 0xD1FFAB1E ; ReadOnlySequenceSegment`1
cmp qword ptr [rdi], rsi
@@ -231,20 +231,20 @@ G_M25988_IG14:
mov dword ptr [rbp-3CH], r10d
mov rdi, rsi
mov rsi, r14
-; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (mov: 1) 32B boundary ...............................
call [CORINFO_HELP_CHKCASTCLASS_SPECIAL]
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (call: 3 ; jcc erratum) 32B boundary ...............................
mov r9, gword ptr [rbp-50H]
mov r10d, dword ptr [rbp-3CH]
;; size=24 bbWeight=0.50 PerfScore 3.25
G_M25988_IG15:
mov rdi, r14
lea rsi, bword ptr [r14+24]
- cmp byte ptr [rsi], sil
+ cmp dword ptr [rsi], esi
mov esi, dword ptr [rsi+12]
sub esi, r15d
movsxd r11, esi
-; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (movsxd: 2) 32B boundary ...............................
mov r8, qword ptr [rbp-38H]
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (mov: 1) 32B boundary ...............................
cmp r11, r8
jg SHORT G_M25988_IG17
test esi, esi
@@ -253,20 +253,20 @@ G_M25988_IG15:
sub r8, rsi
mov rcx, r8
mov rdi, gword ptr [rdi+8]
-; ............................... 32B boundary ...............................
mov rsi, r9
mov edx, r10d
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (mov: 1) 32B boundary ...............................
mov r8d, 7
call [ReadOnlySequence`1:SeekMultiSegment(ReadOnlySequenceSegment`1,Object,int,long,int):SequencePosition]
jmp SHORT G_M25988_IG18
- ;; size=69 bbWeight=2 PerfScore 36.50
+ ;; size=68 bbWeight=2 PerfScore 36.50
G_M25988_IG16:
mov r10d, dword ptr [rbp-3CH]
sub r10d, r15d
movsxd rdi, r10d
mov r8, qword ptr [rbp-38H]
-; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (mov: 2) 32B boundary ...............................
cmp rdi, r8
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (cmp: 0 ; jcc erratum) 32B boundary ...............................
jl G_M25988_IG32
;; size=23 bbWeight=2 PerfScore 7.50
G_M25988_IG17:
@@ -280,10 +280,10 @@ G_M25988_IG18:
add ebx, edx
mov ecx, dword ptr [rbp-40H]
mov edx, dword ptr [rbp-44H]
-; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (mov: 2) 32B boundary ...............................
;; size=14 bbWeight=2 PerfScore 5.50
G_M25988_IG19:
cmp r15d, ecx
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (cmp: 0 ; jcc erratum) 32B boundary ...............................
jne G_M25988_IG12
;; size=9 bbWeight=8 PerfScore 10.00
G_M25988_IG20:
@@ -295,9 +295,9 @@ G_M25988_IG20:
;; size=15 bbWeight=4 PerfScore 11.00
G_M25988_IG21:
mov r8d, 1
-; ............................... 32B boundary ...............................
mov gword ptr [rbp-58H], rax
jmp SHORT G_M25988_IG25
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (jmp: 1 ; jcc erratum) 32B boundary ...............................
;; size=12 bbWeight=0.86 PerfScore 2.81
G_M25988_IG22:
test rdi, rdi
@@ -313,8 +313,8 @@ G_M25988_IG24:
mov r8, qword ptr [rdi]
mov r8, qword ptr [r8+64]
call [r8+16]Object:Equals(Object):bool:this
-; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (call: 2 ; jcc erratum) 32B boundary ...............................
mov r8d, eax
+; ............................... 32B boundary ...............................
;; size=21 bbWeight=1.11 PerfScore 9.43
G_M25988_IG25:
jmp SHORT G_M25988_IG27
@@ -334,9 +334,9 @@ G_M25988_IG28:
;; size=2 bbWeight=1 PerfScore 0.25
G_M25988_IG29:
add rsp, 56
-; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (add: 2) 32B boundary ...............................
pop rbx
pop r12
+; ............................... 32B boundary ...............................
pop r13
pop r14
pop r15
@@ -353,10 +353,10 @@ G_M25988_IG31:
;; size=7 bbWeight=0 PerfScore 0.00
G_M25988_IG32:
mov edi, 7
-; ............................... 32B boundary ...............................
call [ThrowHelper:ThrowArgumentOutOfRangeException(int)]
+; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (call: 1 ; jcc erratum) 32B boundary ...............................
int3
;; size=12 bbWeight=0 PerfScore 0.00
-; Total bytes of code 551, prolog size 19, PerfScore 226.09, instruction count 161, allocated bytes for code 551 (MethodHash=06cb9a7b) for method ReadOnlySequenceTests`1:IterateGetPosition(ReadOnlySequence`1):int:this
+; Total bytes of code 546, prolog size 19, PerfScore 225.59, instruction count 161, allocated bytes for code 546 (MethodHash=06cb9a7b) for method ReadOnlySequenceTests`1:IterateGetPosition(ReadOnlySequence`1):int:this cc @kunalspathak, IIRC you had some plans to add a mode to mitigate this for the benchmarking runs. |
I don't think it is even related to https://github.com/dotnet/runtime/pull/71687/files. From the below graph, it went up due to edfca3c...c9feedf and then came back because of one of d434c4a...199580b, none of them seems would have affected this test (except may be #71889). |
I may have mixed this one up with dotnet/perf-autofiling-issues#6700 (comment) (the codegen above is from that benchmark) |
This specific jump has indeed been resolved. |
Run Information
Regressions in System.Buffers.Tests.ReadOnlySequenceTests<Char>
Test Report
Repro
Payloads
Baseline
Compare
Histogram
System.Buffers.Tests.ReadOnlySequenceTests<Char>.IterateGetPositionSingleSegment
Description of detection logic
Docs
Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository
The text was updated successfully, but these errors were encountered: