Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add -Zmutable-noalias flag #45012

Merged
merged 2 commits into from
Oct 8, 2017
Merged

Add -Zmutable-noalias flag #45012

merged 2 commits into from
Oct 8, 2017

Conversation

Gankra
Copy link
Contributor

@Gankra Gankra commented Oct 3, 2017

We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges.

Since then, a few things have happened:

  • llvm has cleaned up a bunch of the issues (I'm told)
  • we've added a nounwind codegen option

As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).

@rust-highfive
Copy link
Collaborator

r? @arielb1

(rust_highfive has picked a reviewer for you, use r? to override)

@Gankra
Copy link
Contributor Author

Gankra commented Oct 3, 2017

cc @jrmuizel

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

Some codgen tests using serde:

code
#[macro_use]
extern crate serde_derive;
extern crate bincode;

use std::io::Write;
use std::{io, ptr};

use bincode::{serialize, deserialize, Infinite};

#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct Entity {
    x: f64,
    y: f64,
    z: f64,
    o: u64
}

#[derive(Serialize, Deserialize, PartialEq, Debug)]
struct World(Vec<Entity>);

struct UnsafeVecWriter<'a>(&'a mut Vec<u8>);

impl<'a> Write for UnsafeVecWriter<'a> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        unsafe {
            let old_len = self.0.len();
            self.0.set_len(old_len + buf.len());
            ptr::copy_nonoverlapping(buf.as_ptr(), self.0.as_mut_ptr().offset(old_len as isize), buf.len());
        }
        Ok(buf.len())
    }
    fn flush(&mut self) -> io::Result<()> { Ok(()) }
}

struct SizeCounter(usize);

impl<'a> Write for SizeCounter {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        self.0 += buf.len();
        Ok(buf.len())
    }
    fn flush(&mut self) -> io::Result<()> { Ok(()) }
}

#[inline(never)]
fn make_bytes(vec: &mut Vec<u8>, e: &Entity) {
    let mut size = SizeCounter(0);
    bincode::serialize_into(&mut size,e , Infinite).unwrap();
    vec.reserve(size.0);
    //vec.reserve(bincode::serialized_size(&e) as usize);

    bincode::serialize_into(&mut UnsafeVecWriter(vec), e, Infinite).unwrap();
}

#[inline(never)]
fn slow_make_bytes(vec: &mut Vec<u8>, e: &Entity) {
    bincode::serialize_into(vec, e, Infinite).unwrap();
}

fn main() {
    let world = Entity { x: 0.0, y: 4.0, z: 5.0, o: 0 };

    let mut encoded = Vec::new();
    make_bytes(&mut encoded, &world);
    slow_make_bytes(&mut encoded, &world);

    // 8 bytes for the length of the vector, 4 bytes per float.
    //assert_eq!(encoded.len(), 8 + 4 * 4);

    let decoded: Entity = deserialize(&encoded[..]).unwrap();

    assert_eq!(world, decoded);
}

make_bytes
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movl	$32, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	movq	8(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	movq	16(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	movq	24(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc
make_bytes -Zmutable-noalias
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movl	$32, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	(%rbx), %rax
	movq	16(%rbx), %rcx
	movups	(%r14), %xmm0
	movups	%xmm0, (%rax,%rcx)
	movups	16(%r14), %xmm0
	leaq	32(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movups	%xmm0, 16(%rax,%rcx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc

Changing Entity to be (f32, f32, f32, u64) (non-uniform sizes)

make_bytes
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movl	$20, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movl	8(%r14), %eax
	movq	16(%rbx), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	12(%r14), %eax
	movq	16(%rbx), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	16(%r14), %eax
	movq	16(%rbx), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movl	%eax, (%rdx,%rcx)
	movq	(%r14), %rax
	movq	16(%rbx), %rcx
	leaq	8(%rcx), %rdx
	movq	%rdx, 16(%rbx)
	movq	(%rbx), %rdx
	movq	%rax, (%rdx,%rcx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc
make_bytes -Zmutable-noalias
__ZN10serde_fast10make_bytes17ha60b3205c9ed12fdE:
	.cfi_startproc
	pushq	%rbp
Lcfi71:
	.cfi_def_cfa_offset 16
Lcfi72:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi73:
	.cfi_def_cfa_register %rbp
	pushq	%r14
	pushq	%rbx
Lcfi74:
	.cfi_offset %rbx, -32
Lcfi75:
	.cfi_offset %r14, -24
	movq	%rsi, %rbx
	movq	%rdi, %r14
	movl	$20, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movl	8(%rbx), %eax
	movq	(%r14), %rcx
	movq	16(%r14), %rdx
	movl	%eax, (%rcx,%rdx)
	movl	12(%rbx), %eax
	movl	%eax, 4(%rcx,%rdx)
	movl	16(%rbx), %eax
	movl	%eax, 8(%rcx,%rdx)
	movq	(%rbx), %rax
	leaq	20(%rdx), %rsi
	movq	%rsi, 16(%r14)
	movq	%rax, 12(%rcx,%rdx)
	popq	%rbx
	popq	%r14
	popq	%rbp
	retq
	.cfi_endproc

slow_make_bytes (uniform, but generally unaffected by any change)
__ZN10serde_fast15slow_make_bytes17hf60759a7955c0485E:
	.cfi_startproc
	pushq	%rbp
Lcfi76:
	.cfi_def_cfa_offset 16
Lcfi77:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi78:
	.cfi_def_cfa_register %rbp
	pushq	%r15
	pushq	%r14
	pushq	%rbx
	pushq	%rax
Lcfi79:
	.cfi_offset %rbx, -40
Lcfi80:
	.cfi_offset %r14, -32
Lcfi81:
	.cfi_offset %r15, -24
	movq	%rsi, %r14
	movq	%rdi, %rbx
	movq	(%r14), %r15
	movl	$8, %esi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r15, (%rcx,%rax)
	movq	8(%r14), %r15
	movl	$8, %esi
	movq	%rbx, %rdi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r15, (%rcx,%rax)
	movq	16(%r14), %r15
	movl	$8, %esi
	movq	%rbx, %rdi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r15, (%rcx,%rax)
	movq	24(%r14), %r14
	movl	$8, %esi
	movq	%rbx, %rdi
	callq	__ZN33_$LT$alloc..vec..Vec$LT$T$GT$$GT$7reserve17hab984fb4b8ec6529E
	movq	16(%rbx), %rax
	leaq	8(%rax), %rcx
	movq	%rcx, 16(%rbx)
	movq	(%rbx), %rcx
	movq	%r14, (%rcx,%rax)
	addq	$8, %rsp
	popq	%rbx
	popq	%r14
	popq	%r15
	popq	%rbp
	retq
	.cfi_endproc

This significantly improves our codegen.

@arthurprs
Copy link
Contributor

This is very interesting. Kudos for running tests 😄

@bluss
Copy link
Member

bluss commented Oct 4, 2017

Anywhere you see SetLenOnDrop inside the vec code, it's probably a workaround for the missing noalias.

@arielb1
Copy link
Contributor

arielb1 commented Oct 4, 2017

@bors r+

@bors
Copy link
Contributor

bors commented Oct 4, 2017

📌 Commit 3647129 has been approved by arielb1

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

@arielb1 what's the legislative process for promoting this to a -C flag? (e.g. usable on stable)

@hanna-kruppe
Copy link
Contributor

Should this become a -C flag? It seems like a temporary hack, almost as much for experimentation as for end users. Hopefully it'll just be the default behavior (again) soon-ish.

@arielb1
Copy link
Contributor

arielb1 commented Oct 4, 2017

@gankro

That's basically https://forge.rust-lang.org/stabilization-guide.html - create a tracking issue, nominate it to T-compiler FCP, write docs.

But I don't think we want this to be a -C feature. I think we should just make this the default behavior soon-ish.

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

@rkruppe it might be reasonable to default if panic=abort, but idk yet about otherwise.

@hanna-kruppe
Copy link
Contributor

Are you talking about lingering misoptimizations? Surely if those exist, end users shouldn't be encouraged to enable it?

@Gankra
Copy link
Contributor Author

Gankra commented Oct 4, 2017

Based on discussion in #45029, I've added a commit to enable this by default if panic=abort.

@arielb1
Copy link
Contributor

arielb1 commented Oct 5, 2017

Let's have a go at this

@bors r+

@bors
Copy link
Contributor

bors commented Oct 5, 2017

📌 Commit a6dea41 has been approved by arielb1

jrmuizel added a commit to jrmuizel/webrender that referenced this pull request Oct 6, 2017
This is the same mode as Gecko uses and will get better codegen
once rust-lang/rust#45012 has landed.
@shepmaster shepmaster added the S-waiting-on-bors Status: Waiting on bors to run and complete tests. Bors will change the label on completion. label Oct 6, 2017
@bstrie
Copy link
Contributor

bstrie commented Oct 7, 2017

Do we have some benchmarks? Would love to know if this has significant effect on panic=abort builds.

@bors
Copy link
Contributor

bors commented Oct 8, 2017

⌛ Testing commit a6dea41 with merge ff8e264...

bors added a commit that referenced this pull request Oct 8, 2017
Add -Zmutable-noalias flag

We disabled noalias on mutable references a long time ago when it was clear that llvm was incorrectly handling this in relation to unwinding edges.

Since then, a few things have happened:

* llvm has cleaned up a bunch of the issues (I'm told)
* we've added a nounwind codegen option

As such, I would like to add this -Z flag so that we can evaluate if the codegen bugs still exist, and if this significantly affects the codegen of different projects, with an eye towards permanently re-enabling it (or at least making it a stable option).
@bors
Copy link
Contributor

bors commented Oct 8, 2017

☀️ Test successful - status-appveyor, status-travis
Approved by: arielb1
Pushing ff8e264 to master...

@bors bors merged commit a6dea41 into rust-lang:master Oct 8, 2017
@bluss
Copy link
Member

bluss commented Oct 8, 2017

There's a "null result" with this option in bluss/arrayvec#74 (but I wouldn't know if it definitely should apply there anyway). I'll run some more benchmarks when I can.

bors-servo pushed a commit to servo/webrender that referenced this pull request Oct 8, 2017
Use panic=abort instead of panic=unwind

This is the same mode as Gecko uses and will get better codegen
once rust-lang/rust#45012 has landed.

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/webrender/1825)
<!-- Reviewable:end -->
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
S-waiting-on-bors Status: Waiting on bors to run and complete tests. Bors will change the label on completion.
Projects
None yet
Development

Successfully merging this pull request may close these issues.

9 participants