-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Upgrade to LLVM11 caused a codegen regression on Windows #78283
Comments
cc @rust-lang/wg-llvm |
I can now reproduce this locally. I'll try to come up with a standalone test case. |
Running the following on win64 with fn inner() -> Vec<u8> {
let color_sample_max = 4;
let pixel_size = 4;
let mut out = vec![0; color_sample_max * 256 * pixel_size];
let mut color = &mut out[..];
for _ in 0..color_sample_max {
for b in 0..=255 {
color[0] = 0x80;
color[1] = 0x80;
color[2] = b;
color[3] = 0x80;
color = &mut color[pixel_size..];
}
}
out
}
fn f1() {
inner();
}
fn f2() {
inner();
}
fn main() {
f1();
f2();
} |
Affects only MSVC:
@rustbot modify labels: +O-windows-msvc |
Error: Label Windows-msvc can only be set by Rust team members Please let |
I compared the generated code between |
Interestingly, there does not seem to be any real differences between the llvm-ir of the two versions which suggests that things are going wrong late in the pipeline. |
Doesn't panic here with LTO, either thin or fat. |
It looks like the codegen difference is caused by the attribute |
Or more specifically, using the name |
Here's the diff from taking the results from --- bad.s 2020-10-25 18:48:19.000000000 -0400
+++ good.s 2020-10-25 18:47:59.000000000 -0400
@@ -12,7 +12,9 @@
.endef
.p2align 4, 0x90 # -- Begin function _ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h39f1d54c4354cfb7E
_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h39f1d54c4354cfb7E: # @_ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h39f1d54c4354cfb7E
+.Lfunc_begin0:
.seh_proc _ZN3std10sys_common9backtrace28__rust_begin_short_backtrace17h39f1d54c4354cfb7E
+ .seh_handler __CxxFramHandler3, @unwind, @except
# %bb.0: # %start
subq $40, %rsp
.seh_stackalloc 40
@@ -23,9 +25,26 @@
#NO_APP
addq $40, %rsp
retq
+.Lfunc_end0:
.seh_handlerdata
.text
.seh_endproc
+ .section .xdata,"dr"
+ .p2align 2
+GCC_except_table0:
+.Lexception0:
+ .byte 255 # @LPStart Encoding = omit
+ .byte 255 # @TType Encoding = omit
+ .byte 1 # Call site Encoding = uleb128
+ .uleb128 .Lcst_end0-.Lcst_begin0
+.Lcst_begin0:
+ .uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
+ .uleb128 .Lfunc_end0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Lfunc_end0
+ .byte 0 # has no landing pad
+ .byte 0 # On action: cleanup
+.Lcst_end0:
+ .p2align 2
+ .text
# -- End function
.def _ZN3std2rt10lang_start17h44748352664c46c2E;
.scl 2;
@@ -107,24 +126,30 @@
.endef
.p2align 4, 0x90 # -- Begin function _ZN3out5inner17h353a9cc7a3c23810E
_ZN3out5inner17h353a9cc7a3c23810E: # @_ZN3out5inner17h353a9cc7a3c23810E
+.Lfunc_begin1:
.seh_proc _ZN3out5inner17h353a9cc7a3c23810E
+ .seh_handler __CxxFramHandler3, @unwind, @except
# %bb.0: # %start
pushq %rsi
.seh_pushreg %rsi
+ pushq %rdi
+ .seh_pushreg %rdi
+ pushq %rbx
+ .seh_pushreg %rbx
subq $32, %rsp
.seh_stackalloc 32
.seh_endprologue
- movq %rcx, %rsi
+ movq %rcx, %rbx
movl $4096, %ecx # imm = 0x1000
movl $1, %edx
callq __rust_alloc_zeroed
testq %rax, %rax
- je .LBB5_17
+ je .LBB5_18
# %bb.1: # %_ZN5alloc3vec9from_elem17h74690dcc847adf77E.exit
- movq %rax, (%rsi)
- movq $4096, 8(%rsi) # imm = 0x1000
- movq $4096, 16(%rsi) # imm = 0x1000
- movq $-4096, %r8 # imm = 0xF000
+ movq %rax, (%rbx)
+ movq $4096, 8(%rbx) # imm = 0x1000
+ movq $4096, 16(%rbx) # imm = 0x1000
+ movq $-4096, %rdx # imm = 0xF000
xorl %ecx, %ecx
.p2align 4, 0x90
.LBB5_2: # %bb2.i
@@ -137,7 +162,7 @@
movb %cl, 6(%rax)
movb $-128, 7(%rax)
addq $8, %rax
- addq $8, %r8
+ addq $8, %rdx
incb %cl
jne .LBB5_2
# %bb.3: # %bb2.i.1.preheader
@@ -150,16 +175,16 @@
movb %sil, 2(%rax,%rsi,4)
movw $-32640, 3(%rax,%rsi,4) # imm = 0x8080
movb $-128, 5(%rax,%rsi,4)
- leal 1(%rsi), %edx
- movb %dl, 6(%rax,%rsi,4)
+ leal 1(%rsi), %edi
+ movb %dil, 6(%rax,%rsi,4)
movb $-128, 7(%rax,%rsi,4)
addq $-8, %rcx
addq $2, %rsi
- cmpb $-1, %dl
+ cmpb $-1, %dil
jne .LBB5_4
# %bb.5: # %bb2.i.2.preheader
subq %rcx, %rax
- subq %r8, %rcx
+ subq %rdx, %rcx
xorl %edx, %edx
.p2align 4, 0x90
.LBB5_6: # %bb2.i.2
@@ -177,32 +202,41 @@
jne .LBB5_6
# %bb.7: # %bb2.i.3.preheader
xorl %edx, %edx
- testq %rcx, %rcx
- je .LBB5_9
.p2align 4, 0x90
-.LBB5_13: # %bb16.3
+.LBB5_8: # %bb2.i.3
# =>This Inner Loop Header: Depth=1
+ testq %rcx, %rcx
+ je .LBB5_9
+# %bb.13: # %bb16.3
+ # in Loop: Header=BB5_8 Depth=1
movb $-128, (%rax)
cmpq $1, %rcx
je .LBB5_10
# %bb.14: # %bb17.3
- # in Loop: Header=BB5_13 Depth=1
+ # in Loop: Header=BB5_8 Depth=1
leal 1(%rdx), %esi
movb $-128, 1(%rax)
cmpq $3, %rcx
jb .LBB5_11
# %bb.15: # %bb18.3
- # in Loop: Header=BB5_13 Depth=1
+ # in Loop: Header=BB5_8 Depth=1
movb %dl, 2(%rax)
je .LBB5_12
# %bb.16: # %bb19.3
- # in Loop: Header=BB5_13 Depth=1
+ # in Loop: Header=BB5_8 Depth=1
movb $-128, 3(%rax)
addq $-4, %rcx
addq $4, %rax
movl %esi, %edx
- testq %rcx, %rcx
- jne .LBB5_13
+ testb %sil, %sil
+ jne .LBB5_8
+# %bb.17: # %bb4.loopexit.3
+ movq %rbx, %rax
+ addq $32, %rsp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ retq
.LBB5_9: # %panic
leaq .Lalloc20(%rip), %r8
xorl %ecx, %ecx
@@ -223,14 +257,27 @@
movl $3, %ecx
movl $3, %edx
callq _ZN4core9panicking18panic_bounds_check17h2d25ebb349b8ce6eE
-.LBB5_17: # %bb20.i.i.i.i.i
+.LBB5_18: # %bb20.i.i.i.i.i
movl $4096, %ecx # imm = 0x1000
movl $1, %edx
callq _ZN5alloc5alloc18handle_alloc_error17h71c060cff7245371E
int3
+.Lfunc_end1:
.seh_handlerdata
.text
.seh_endproc
+ .section .xdata,"dr"
+ .p2align 2
+GCC_except_table5:
+.Lexception1:
+ .byte 255 # @LPStart Encoding = omit
+ .byte 255 # @TType Encoding = omit
+ .byte 1 # Call site Encoding = uleb128
+ .uleb128 .Lcst_end1-.Lcst_begin1
+.Lcst_begin1:
+.Lcst_end1:
+ .p2align 2
+ .text
# -- End function
.def _ZN3out4main17ha1c9ae0fb8136a1fE;
.scl 3; |
It looks like this difference is introduced in the |
where this transformation happens: @@ -241,12 +241,8 @@
%scevgep2 = getelementptr i8, i8* %color.sroa.0.164.in.3, i64 3
store i8 -128, i8* %scevgep2, align 1
%_7.i.i.i.i.3 = add i64 %color.sroa.13.163.3, -4
- %.not.3 = icmp eq i8 %24, 0
%scevgep3 = getelementptr i8, i8* %color.sroa.0.164.in.3, i64 4
- br i1 %.not.3, label %bb4.loopexit.3, label %bb2.i.3
-
- bb4.loopexit.3: ; preds = %bb19.3
- ret void
+ br label %bb2.i.3
} |
The |
It looks like the standalone version of this test case started panicing back in rust 1.38. My guess is the LLVM11 upgrade caused the initial code to start hitting the broken path. |
It looks like rust 1.38 changes some u8 The following patch to the 1.37 ir to make it more like the 1.37 ir is sufficient to trigger the panic. --- main.ll 2020-10-26 12:25:05 -0400
+++ main-1.37-nuw.ll 2020-10-26 12:24:24 -0400
@@ -211,7 +211,7 @@
%color.sroa.13.159.3 = phi i64 [ %53, %bb22.3 ], [ %42, %bb3.i.2 ]
%iter1.sroa.0.058.3 = phi i8 [ %44, %bb22.3 ], [ 0, %bb3.i.2 ]
%43 = icmp eq i8 %iter1.sroa.0.058.3, -1
- %44 = add i8 %iter1.sroa.0.058.3, 1
+ %44 = add nuw i8 %iter1.sroa.0.058.3, 1
%45 = icmp eq i64 %color.sroa.13.159.3, 0
br i1 %45, label %panic, label %bb18.3, !prof !13 |
Might be the same issue as #74498. |
Yeah, it certainly looks related. https://bugs.llvm.org/show_bug.cgi?id=46943 is the upstream bug. |
Here's a further reduced version: const color_sample_max: usize = 2;
const pixel_size: usize = 4;
#[inline(never)]
fn inner(mut out: &mut [u8]) {
let mut color = &mut out[..];
for _ in 0..color_sample_max {
for b in 0..=255 {
unsafe {
if color.len() < 1 { panic!() }
*color.get_unchecked_mut(0) = 0x80;
if color.len() < 2 { panic!() }
*color.get_unchecked_mut(1) = 0x80;
if color.len() < 3 { panic!() }
*color.get_unchecked_mut(2) = b;
*color.get_unchecked_mut(3) = 0x80;
}
color = &mut color[pixel_size..];
}
}
}
fn main() {
let mut out = vec![0; color_sample_max * 256 * pixel_size];
let out = inner(&mut out);
} |
I have a patch that fixes (what I believe to be) the underlying LLVM bug; see https://bugs.llvm.org/show_bug.cgi?id=46943 ; I'm working on making a LLVM unit test and then I'll put up a patch for the llvm-devs to review. |
This should be fixed now that #81451 has been merged. |
@jrmuizel Would you be able to confirm that this is indeed fixed now? I don't have a Windows system to test. |
So, I believe I hit this bug in some other code today (In an older nightly, fixed on master, no worries about a new regression popping up). However, I can confirm that the literal bug here has been fixed: #78283 (comment) (note: 2021-03-04 is using llvm 11, 2021-03-05 is using llvm 12)
|
The following code is panicing for me in CI on Win64 and Win32 (not other platforms). I haven't been able to reproduce it locally, but we bisected it to #73526 (the LLVM 11 upgrade).
Code
It still fails on Nightly.
https://bugzilla.mozilla.org/show_bug.cgi?id=1672813 has some more details.
The text was updated successfully, but these errors were encountered: