-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Unroll StringBuilder.Append for const string #85894
Conversation
Very nice use-case! One problem that it might slightly regress the case when it's not inlined and Mono. Another option is to tune inliner - if it sees |
this seems to be enough to make it inlineable for const input (it gives inliner extra hint) |
You are right we need to keep small size optimization, I figured it was only there for constant separators/newline, and I didn't think about Mono.
That's a cool trick! It seem to make it inline it pretty aggressively unfortunately, even when the input is not constant. It causes the code size to increase a lot where its not really necessary.
What's funny is that if I use the byte variant of It seems to hit a pretty good spot now where it inlines for a constant length, but not otherwise. I know very little about how the jit works, so I'm out of my league here - but is it possible an even better solution is to somehow make both variants of Buffer.Memmove have the same inlining "weight"? It feels a bit odd that they behave differently here. |
With current code: public static void Example(StringBuilder builder, string s)
{
builder.Append(s);
builder.Append("1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN");
builder.AppendLine();
} ; Method Program:Example(System.Text.StringBuilder,System.String)
G_M17123_IG01: ;; offset=0000H
push rsi
sub rsp, 32
vzeroupper
mov rsi, rcx
;; size=11 bbWeight=1 PerfScore 2.50
G_M17123_IG02: ;; offset=000BH
cmp byte ptr [rsi], sil
test rdx, rdx
je SHORT G_M17123_IG04
;; size=8 bbWeight=1 PerfScore 4.25
G_M17123_IG03: ;; offset=0013H
mov r8d, dword ptr [rdx+08H]
add rdx, 12
mov rcx, rsi
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=17 bbWeight=0.50 PerfScore 2.75
G_M17123_IG04: ;; offset=0024H
mov rdx, 0x1EB802028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
mov rcx, gword ptr [rsi+08H]
mov r8d, dword ptr [rsi+18H]
lea eax, [r8+32H]
cmp dword ptr [rcx+08H], eax
jb SHORT G_M17123_IG06
;; size=31 bbWeight=1 PerfScore 9.00
G_M17123_IG05: ;; offset=0043H
movsxd rax, r8d
lea rcx, bword ptr [rcx+2*rax+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymm1, ymmword ptr [rdx+20H]
vmovdqu ymm2, ymmword ptr [rdx+40H]
vmovdqu xmm3, xmmword ptr [rdx+54H]
vmovdqu ymmword ptr [rcx], ymm0
vmovdqu ymmword ptr [rcx+20H], ymm1
vmovdqu ymmword ptr [rcx+40H], ymm2
vmovdqu xmmword ptr [rcx+54H], xmm3
add r8d, 50
mov dword ptr [rsi+18H], r8d
jmp SHORT G_M17123_IG07
;; size=56 bbWeight=0.50 PerfScore 15.75
G_M17123_IG06: ;; offset=007BH
mov rcx, rsi
mov r8d, 50
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=15 bbWeight=0.50 PerfScore 1.75
G_M17123_IG07: ;; offset=008AH
mov rdx, 0x1EB80202958 ; ' '
add rdx, 12
mov rcx, gword ptr [rsi+08H]
mov r8d, dword ptr [rsi+18H]
lea eax, [r8+02H]
cmp dword ptr [rcx+08H], eax
jb SHORT G_M17123_IG09
;; size=31 bbWeight=1 PerfScore 9.00
G_M17123_IG08: ;; offset=00A9H
movsxd rdx, r8d
lea rcx, bword ptr [rcx+2*rdx+10H]
mov word ptr [rcx], 13
mov word ptr [rcx+02H], 10
mov dword ptr [rsi+18H], eax
jmp SHORT G_M17123_IG10
;; size=24 bbWeight=0.50 PerfScore 3.12
G_M17123_IG09: ;; offset=00C1H
mov rcx, rsi
mov r8d, 2
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=15 bbWeight=0.50 PerfScore 1.75
G_M17123_IG10: ;; offset=00D0H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M17123_IG11: ;; offset=00D1H
add rsp, 32
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
; Total bytes of code: 215 |
Works perfectly for this scenario, thanks! I'll post some examples here. I assume you will want to take it from here and close this? Append non-constpublic static void Append(StringBuilder builder, string s)
{
builder.Append(s);
} Before ; Method Program:Append(System.Text.StringBuilder,System.String)
G_M43621_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M43621_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
test rdx, rdx
je SHORT G_M43621_IG04
;; size=7 bbWeight=1 PerfScore 4.25
G_M43621_IG03: ;; offset=000BH
mov r8d, dword ptr [rdx+08H]
add rdx, 12
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=14 bbWeight=0.49 PerfScore 2.57
G_M43621_IG04: ;; offset=0019H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M43621_IG05: ;; offset=001AH
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 31 After ; Method Program:Append(System.Text.StringBuilder,System.String)
G_M43621_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M43621_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
test rdx, rdx
je SHORT G_M43621_IG04
;; size=7 bbWeight=1 PerfScore 4.25
G_M43621_IG03: ;; offset=000BH
mov r8d, dword ptr [rdx+08H]
add rdx, 12
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=14 bbWeight=0.50 PerfScore 2.62
G_M43621_IG04: ;; offset=0019H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M43621_IG05: ;; offset=001AH
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 31 Append by index non-constpublic static void AppendIndex(StringBuilder builder, string s)
{
builder.Append(s, 2, 16);
} Before ; Method Program:AppendIndex(System.Text.StringBuilder,System.String)
G_M19771_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M19771_IG02: ;; offset=0004H
mov r8d, 2
mov r9d, 16
cmp dword ptr [rcx], ecx
call [System.Text.StringBuilder:Append(System.String,int,int):System.Text.StringBuilder:this]
nop
;; size=21 bbWeight=1 PerfScore 6.75
G_M19771_IG03: ;; offset=0019H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 30 After ; Method Program:AppendIndex(System.Text.StringBuilder,System.String)
G_M19771_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M19771_IG02: ;; offset=0004H
mov r8d, 2
mov r9d, 16
cmp dword ptr [rcx], ecx
call [System.Text.StringBuilder:Append(System.String,int,int):System.Text.StringBuilder:this]
nop
;; size=21 bbWeight=1 PerfScore 6.75
G_M19771_IG03: ;; offset=0019H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 30 AppendLinepublic static void AppendLine(StringBuilder builder, string s)
{
builder.AppendLine(s);
} Before ; Method Program:AppendLine(System.Text.StringBuilder,System.String)
G_M48267_IG01: ;; offset=0000H
push rsi
sub rsp, 32
mov rsi, rcx
;; size=8 bbWeight=1 PerfScore 1.50
G_M48267_IG02: ;; offset=0008H
cmp byte ptr [rsi], sil
test rdx, rdx
je SHORT G_M48267_IG04
;; size=8 bbWeight=1 PerfScore 4.25
G_M48267_IG03: ;; offset=0010H
mov r8d, dword ptr [rdx+08H]
add rdx, 12
mov rcx, rsi
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=17 bbWeight=0.49 PerfScore 2.70
G_M48267_IG04: ;; offset=0021H
mov rdx, 0x283002028D8 ; ' '
add rdx, 12
mov rcx, rsi
mov r8d, 2
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=30 bbWeight=1 PerfScore 4.25
G_M48267_IG05: ;; offset=003FH
add rsp, 32
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
; Total bytes of code: 69 After ; Method Program:AppendLine(System.Text.StringBuilder,System.String)
G_M48267_IG01: ;; offset=0000H
push rsi
sub rsp, 32
mov rsi, rcx
;; size=8 bbWeight=1 PerfScore 1.50
G_M48267_IG02: ;; offset=0008H
cmp byte ptr [rsi], sil
test rdx, rdx
je SHORT G_M48267_IG04
;; size=8 bbWeight=1 PerfScore 4.25
G_M48267_IG03: ;; offset=0010H
mov r8d, dword ptr [rdx+08H]
add rdx, 12
mov rcx, rsi
call [System.Text.StringBuilder:Append(byref,int):this]
;; size=17 bbWeight=0.50 PerfScore 2.75
G_M48267_IG04: ;; offset=0021H
mov rdx, 0x207003028D8 ; ' '
add rdx, 12
mov rcx, gword ptr [rsi+08H]
mov r8d, dword ptr [rsi+18H]
lea eax, [r8+02H]
cmp dword ptr [rcx+08H], eax
jb SHORT G_M48267_IG06
;; size=31 bbWeight=1 PerfScore 9.00
G_M48267_IG05: ;; offset=0040H
movsxd rdx, r8d
lea rcx, bword ptr [rcx+2*rdx+10H]
mov word ptr [rcx], 13
mov word ptr [rcx+02H], 10
mov dword ptr [rsi+18H], eax
jmp SHORT G_M48267_IG07
;; size=24 bbWeight=0.50 PerfScore 3.12
G_M48267_IG06: ;; offset=0058H
mov rcx, rsi
mov r8d, 2
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=15 bbWeight=0.50 PerfScore 1.75
G_M48267_IG07: ;; offset=0067H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M48267_IG08: ;; offset=0068H
add rsp, 32
pop rsi
ret
;; size=6 bbWeight=1 PerfScore 1.75
; Total bytes of code: 110 Append constpublic static void AppendConst(StringBuilder builder)
{
builder.Append("1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN");
} Before ; Method Program:AppendConst(System.Text.StringBuilder)
G_M5650_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M5650_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
mov rdx, 0x14A003028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
mov r8d, 50
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=29 bbWeight=1 PerfScore 7.00
G_M5650_IG03: ;; offset=0021H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 38 After ; Method Program:AppendConst(System.Text.StringBuilder)
G_M5650_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M5650_IG02: ;; offset=0007H
mov rdx, 0x20B002028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+32H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M5650_IG04
;; size=31 bbWeight=1 PerfScore 9.00
G_M5650_IG03: ;; offset=0026H
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymm1, ymmword ptr [rdx+20H]
vmovdqu ymm2, ymmword ptr [rdx+40H]
vmovdqu xmm3, xmmword ptr [rdx+54H]
vmovdqu ymmword ptr [r8], ymm0
vmovdqu ymmword ptr [r8+20H], ymm1
vmovdqu ymmword ptr [r8+40H], ymm2
vmovdqu xmmword ptr [r8+54H], xmm3
add eax, 50
mov dword ptr [rcx+18H], eax
jmp SHORT G_M5650_IG05
;; size=58 bbWeight=0.50 PerfScore 15.75
G_M5650_IG04: ;; offset=0060H
mov r8d, 50
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M5650_IG05: ;; offset=006CH
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M5650_IG06: ;; offset=006DH
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 114 Append by index constpublic static void AppendConstIndex(StringBuilder builder)
{
builder.Append("1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN", 2, 16);
} Before ; Method Program:AppendConstIndex(System.Text.StringBuilder)
G_M5612_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M5612_IG02: ;; offset=0007H
mov rdx, 0x257002028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
add rdx, 4
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+10H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M5612_IG04
;; size=35 bbWeight=1 PerfScore 9.25
G_M5612_IG03: ;; offset=002AH
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymmword ptr [r8], ymm0
add eax, 16
mov dword ptr [rcx+18H], eax
jmp SHORT G_M5612_IG05
;; size=25 bbWeight=0.50 PerfScore 5.75
G_M5612_IG04: ;; offset=0043H
mov r8d, 16
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M5612_IG05: ;; offset=004FH
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M5612_IG06: ;; offset=0050H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 85 After ; Method Program:AppendConstIndex(System.Text.StringBuilder)
G_M5612_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M5612_IG02: ;; offset=0007H
mov rdx, 0x18E802028D8 ; '1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN'
add rdx, 12
add rdx, 4
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+10H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M5612_IG04
;; size=35 bbWeight=1 PerfScore 9.25
G_M5612_IG03: ;; offset=002AH
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymmword ptr [r8], ymm0
add eax, 16
mov dword ptr [rcx+18H], eax
jmp SHORT G_M5612_IG05
;; size=25 bbWeight=0.50 PerfScore 5.75
G_M5612_IG04: ;; offset=0043H
mov r8d, 16
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M5612_IG05: ;; offset=004FH
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M5612_IG06: ;; offset=0050H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 85 |
Feel free to integrate into your PR so we can merge it |
src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for noticing the opportunity!
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue DetailsBuffer.Memmove is now being unrolled for constant lengths. If we simplify Append(ref char, int) a bit, the JIT can inline it. Const string example public static void Example(StringBuilder stringBuilder)
{
stringBuilder.Append("1234567890abcdefghijklmnopqrstuvwxyzåäö");
} Before: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M35345_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
mov rdx, 0x154802028D8 ; '1'
add rdx, 12
mov r8d, 39
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=29 bbWeight=1 PerfScore 7.00
G_M35345_IG03: ;; offset=0021H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 38 After: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
vzeroupper
;; size=7 bbWeight=1 PerfScore 1.25
G_M35345_IG02: ;; offset=0007H
mov rdx, 0x18C802028D8 ; '1'
add rdx, 12
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+27H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M35345_IG04
;; size=31 bbWeight=1 PerfScore 9.00
G_M35345_IG03: ;; offset=0026H
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
vmovdqu ymm0, ymmword ptr [rdx]
vmovdqu ymm1, ymmword ptr [rdx+20H]
vmovdqu xmm2, xmmword ptr [rdx+3EH]
vmovdqu ymmword ptr [r8], ymm0
vmovdqu ymmword ptr [r8+20H], ymm1
vmovdqu xmmword ptr [r8+3EH], xmm2
add eax, 39
mov dword ptr [rcx+18H], eax
jmp SHORT G_M35345_IG05
;; size=47 bbWeight=0.50 PerfScore 12.25
G_M35345_IG04: ;; offset=0055H
mov r8d, 39
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M35345_IG05: ;; offset=0061H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M35345_IG06: ;; offset=0062H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 103 AppendLine example public static void Example(StringBuilder stringBuilder)
{
stringBuilder.AppendLine();
} Before: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M35345_IG02: ;; offset=0004H
cmp byte ptr [rcx], cl
mov rdx, 0x195802028D8 ; ' '
add rdx, 12
mov r8d, 2
call [System.Text.StringBuilder:Append(byref,int):this]
nop
;; size=29 bbWeight=1 PerfScore 7.00
G_M35345_IG03: ;; offset=0021H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 38 After: ; Method Program:Example(System.Text.StringBuilder)
G_M35345_IG01: ;; offset=0000H
sub rsp, 40
;; size=4 bbWeight=1 PerfScore 0.25
G_M35345_IG02: ;; offset=0004H
mov rdx, 0x17C002028D8 ; ' '
add rdx, 12
mov r8, gword ptr [rcx+08H]
mov eax, dword ptr [rcx+18H]
lea r9d, [rax+02H]
cmp dword ptr [r8+08H], r9d
jb SHORT G_M35345_IG04
;; size=31 bbWeight=1 PerfScore 9.00
G_M35345_IG03: ;; offset=0023H
movsxd r9, eax
lea r8, bword ptr [r8+2*r9+10H]
mov r9d, dword ptr [rdx]
mov dword ptr [r8], r9d
add eax, 2
mov dword ptr [rcx+18H], eax
jmp SHORT G_M35345_IG05
;; size=22 bbWeight=0.50 PerfScore 3.75
G_M35345_IG04: ;; offset=0039H
mov r8d, 2
call [System.Text.StringBuilder:AppendWithExpansion(byref,int):this]
;; size=12 bbWeight=0.50 PerfScore 1.62
G_M35345_IG05: ;; offset=0045H
nop
;; size=1 bbWeight=1 PerfScore 0.25
G_M35345_IG06: ;; offset=0046H
add rsp, 40
ret
;; size=5 bbWeight=1 PerfScore 1.25
; Total bytes of code: 75 @EgorBo can you please take a look at this
|
@AndyAyersMS PTAL inliner change |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This apparently is regressing Fortunes benchmarks with middleware in aspnet. |
Thanks! The one that keeps giving. Makes sense since I couldn't repro the regression on the newer machines (aspnet-perf-lin). |
Buffer.Memmove is now being unrolled for constant lengths. If we simplify Append(ref char, int) a bit, the JIT can inline it.
As a result, this allows the methods StringBuilder.Append(string) and StringBuilder.AppendLine() to be able to be unrolled.
I didn't seem to need to add any AggressiveInlining attribute, so I left it out.
Const string example
Before:
After:
AppendLine example
Before:
After:
@EgorBo can you please take a look at this