-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add -Zmutable-noalias flag #45012
Add -Zmutable-noalias flag #45012
Conversation
r? @arielb1 (rust_highfive has picked a reviewer for you, use r? to override) |
cc @jrmuizel |
Some codgen tests using serde: code#[macro_use]
extern crate serde_derive;
extern crate bincode;
use std::io::Write;
use std::{io, ptr};
use bincode::{serialize, deserialize, Infinite};
#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct Entity {
x: f64,
y: f64,
z: f64,
o: u64
}
#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct World(Vec<Entity>);
struct UnsafeVecWriter<'a>(&'a mut Vec<u8>);
impl<'a> Write for UnsafeVecWriter<'a> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
unsafe {
let old_len = self.0.len();
self.0.set_len(old_len + buf.len());
ptr::copy_nonoverlapping(buf.as_ptr(), self.0.as_mut_ptr().offset(old_len as isize), buf.len());
}
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> { Ok(()) }
}
struct SizeCounter(usize);
impl<'a> Write for SizeCounter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.0 += buf.len();
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> { Ok(()) }
}
#[inline(never)]
fn make_bytes(vec: &mut Vec<u8>, e: &Entity) {
let mut size = SizeCounter(0);
bincode::serialize_into(&mut size,e , Infinite).unwrap();
vec.reserve(size.0);
//vec.reserve(bincode::serialized_size(&e) as usize);
bincode::serialize_into(&mut UnsafeVecWriter(vec), e, Infinite).unwrap();
}
#[inline(never)]
fn slow_make_bytes(vec: &mut Vec<u8>, e: &Entity) {
bincode::serialize_into(vec, e, Infinite).unwrap();
}
fn main() {
let world = Entity { x: 0.0, y: 4.0, z: 5.0, o: 0 };
let mut encoded = Vec::new();
make_bytes(&mut encoded, &world);
slow_make_bytes(&mut encoded, &world);
// 8 bytes for the length of the vector, 4 bytes per float.
//assert_eq!(encoded.len(), 8 + 4 * 4);
let decoded: Entity = deserialize(&encoded[..]).unwrap();
assert_eq!(world, decoded);
} make_bytes__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %r14
movq %rdi, %rbx
movl $32, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq (%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
movq 8(%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
movq 16(%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
movq 24(%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc make_bytes -Zmutable-noalias__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %r14
movq %rdi, %rbx
movl $32, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq (%rbx), %rax
movq 16(%rbx), %rcx
movups (%r14), %xmm0
movups %xmm0, (%rax,%rcx)
movups 16(%r14), %xmm0
leaq 32(%rcx), %rdx
movq %rdx, 16(%rbx)
movups %xmm0, 16(%rax,%rcx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc Changing Entity to be make_bytes__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %r14
movq %rdi, %rbx
movl $20, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movl 8(%r14), %eax
movq 16(%rbx), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movl %eax, (%rdx,%rcx)
movl 12(%r14), %eax
movq 16(%rbx), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movl %eax, (%rdx,%rcx)
movl 16(%r14), %eax
movq 16(%rbx), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movl %eax, (%rdx,%rcx)
movq (%r14), %rax
movq 16(%rbx), %rcx
leaq 8(%rcx), %rdx
movq %rdx, 16(%rbx)
movq (%rbx), %rdx
movq %rax, (%rdx,%rcx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc make_bytes -Zmutable-noalias__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
.cfi_startproc
pushq %rbp
Lcfi71:
.cfi_def_cfa_offset 16
Lcfi72:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi73:
.cfi_def_cfa_register %rbp
pushq %r14
pushq %rbx
Lcfi74:
.cfi_offset %rbx, -32
Lcfi75:
.cfi_offset %r14, -24
movq %rsi, %rbx
movq %rdi, %r14
movl $20, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movl 8(%rbx), %eax
movq (%r14), %rcx
movq 16(%r14), %rdx
movl %eax, (%rcx,%rdx)
movl 12(%rbx), %eax
movl %eax, 4(%rcx,%rdx)
movl 16(%rbx), %eax
movl %eax, 8(%rcx,%rdx)
movq (%rbx), %rax
leaq 20(%rdx), %rsi
movq %rsi, 16(%r14)
movq %rax, 12(%rcx,%rdx)
popq %rbx
popq %r14
popq %rbp
retq
.cfi_endproc slow_make_bytes (uniform, but generally unaffected by any change)__ZN10serde_fast15slow_make_bytes17hf60759a7955c0485E:
.cfi_startproc
pushq %rbp
Lcfi76:
.cfi_def_cfa_offset 16
Lcfi77:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi78:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Lcfi79:
.cfi_offset %rbx, -40
Lcfi80:
.cfi_offset %r14, -32
Lcfi81:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %rbx
movq (%r14), %r15
movl $8, %esi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r15, (%rcx,%rax)
movq 8(%r14), %r15
movl $8, %esi
movq %rbx, %rdi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r15, (%rcx,%rax)
movq 16(%r14), %r15
movl $8, %esi
movq %rbx, %rdi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r15, (%rcx,%rax)
movq 24(%r14), %r14
movl $8, %esi
movq %rbx, %rdi
callq __ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
movq 16(%rbx), %rax
leaq 8(%rax), %rcx
movq %rcx, 16(%rbx)
movq (%rbx), %rcx
movq %r14, (%rcx,%rax)
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
retq
.cfi_endproc This significantly improves our codegen. |
This is very interesting. Kudos for running tests 😄 |
Anywhere you see SetLenOnDrop inside the vec code, it's probably a workaround for the missing noalias. |
@bors r+ |
📌 Commit 3647129 has been approved by |
@arielb1 what's the legislative process for promoting this to a -C flag? (e.g. usable on stable) |
Should this become a |
That's basically https://forge.rust-lang.org/stabilization-guide.html - create a tracking issue, nominate it to T-compiler FCP, write docs. But I don't think we want this to be a |
@rkruppe it might be reasonable to default if panic=abort, but idk yet about otherwise. |
Are you talking about lingering misoptimizations? Surely if those exist, end users shouldn't be encouraged to enable it? |
Based on discussion in #45029, I've added a commit to enable this by default if |
Let's have a go at this @bors r+ |
📌 Commit a6dea41 has been approved by |
This is the same mode as Gecko uses and will get better codegen once rust-lang/rust#45012 has landed.
Do we have some benchmarks? Would love to know if this has significant effect on |
Add -Zmutable-noalias flag We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges. Since then, a few things have happened: * llvm has cleaned up a bunch of the issues (I'm told) * we've added a nounwind codegen option As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).
☀️ Test successful - status-appveyor, status-travis |
There's a "null result" with this option in bluss/arrayvec#74 (but I wouldn't know if it definitely should apply there anyway). I'll run some more benchmarks when I can. |
Use panic=abort instead of panic=unwind This is the same mode as Gecko uses and will get better codegen once rust-lang/rust#45012 has landed. <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/webrender/1825) <!-- Reviewable:end -->
We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges.
Since then, a few things have happened:
As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).