Skip to content

Commit

Permalink
Rollup merge of rust-lang#48012 - scottmcm:faster-rangeinclusive-fold…
Browse files Browse the repository at this point in the history
…, r=alexcrichton

Override try_[r]fold for RangeInclusive

Because the last item needs special handling, it seems that LLVM has trouble canonicalizing the loops in external iteration.  With the override, it becomes obvious that the start==end case exits the loop (as opposed to the one *after* that exiting the loop in external iteration).

Demo adapted from rust-lang#45222
```rust
#[no_mangle]
pub fn foo3r(n: u64) -> u64 {
    let mut count = 0;
    (0..n).for_each(|_| {
        (0 ..= n).rev().for_each(|j| {
            count += j;
        })
    });
    count
}
```

<details>
 <summary>Current nightly ASM, 100 lines (https://play.rust-lang.org/?gist=f5674c702c6e2045c3aab5d03763e5f6&version=nightly&mode=release)</summary>

```asm
foo3r:
	pushq	%rbx
.Lcfi0:
.Lcfi1:
	testq	%rdi, %rdi
	je	.LBB0_1
	testb	$1, %dil
	jne	.LBB0_4
	xorl	%eax, %eax
	xorl	%r8d, %r8d
	cmpq	$1, %rdi
	jne	.LBB0_11
	jmp	.LBB0_23
.LBB0_1:
	xorl	%eax, %eax
	popq	%rbx
	retq
.LBB0_4:
	xorl	%r8d, %r8d
	movq	$-1, %r9
	xorl	%eax, %eax
	movq	%rdi, %r11
	xorl	%r10d, %r10d
	jmp	.LBB0_5
.LBB0_8:
	addq	%r11, %rax
	movq	%rsi, %r11
	movq	%rdx, %r10
.LBB0_5:
	cmpq	%r11, %r10
	movl	$1, %ecx
	cmovbq	%r9, %rcx
	cmoveq	%r8, %rcx
	testq	%rcx, %rcx
	movl	$0, %esi
	movl	$1, %edx
	je	.LBB0_8
	cmpq	$-1, %rcx
	jne	.LBB0_9
	leaq	-1(%r11), %rsi
	movq	%r10, %rdx
	jmp	.LBB0_8
.LBB0_9:
	movl	$1, %r8d
	cmpq	$1, %rdi
	je	.LBB0_23
.LBB0_11:
	xorl	%r9d, %r9d
	movq	$-1, %r10
.LBB0_12:
	movq	%rdi, %rsi
	xorl	%r11d, %r11d
	jmp	.LBB0_13
.LBB0_16:
	addq	%rsi, %rax
	movq	%rcx, %rsi
	movq	%rbx, %r11
.LBB0_13:
	cmpq	%rsi, %r11
	movl	$1, %edx
	cmovbq	%r10, %rdx
	cmoveq	%r9, %rdx
	testq	%rdx, %rdx
	movl	$0, %ecx
	movl	$1, %ebx
	je	.LBB0_16
	cmpq	$-1, %rdx
	jne	.LBB0_17
	leaq	-1(%rsi), %rcx
	movq	%r11, %rbx
	jmp	.LBB0_16
.LBB0_17:
	movq	%rdi, %rcx
	xorl	%r11d, %r11d
	jmp	.LBB0_18
.LBB0_21:
	addq	%rcx, %rax
	movq	%rsi, %rcx
	movq	%rbx, %r11
.LBB0_18:
	cmpq	%rcx, %r11
	movl	$1, %edx
	cmovbq	%r10, %rdx
	cmoveq	%r9, %rdx
	testq	%rdx, %rdx
	movl	$0, %esi
	movl	$1, %ebx
	je	.LBB0_21
	cmpq	$-1, %rdx
	jne	.LBB0_22
	leaq	-1(%rcx), %rsi
	movq	%r11, %rbx
	jmp	.LBB0_21
.LBB0_22:
	addq	$2, %r8
	cmpq	%rdi, %r8
	jne	.LBB0_12
.LBB0_23:
	popq	%rbx
	retq
.Lfunc_end0:
```
</details><br>

With this PR:
```asm
foo3r:
	test	rcx, rcx
	je	.LBB3_1
	lea	r8, [rcx - 1]
	lea	rdx, [rcx - 2]
	mov	rax, r8
	mul	rdx
	shld	rdx, rax, 63
	imul	r8, r8
	add	r8, rcx
	sub	r8, rdx
	imul	r8, rcx
	mov	rax, r8
	ret
.LBB3_1:
	xor	r8d, r8d
	mov	rax, r8
	ret
```
  • Loading branch information
kennytm authored Feb 6, 2018
2 parents a026e8a + 1b1e887 commit 4f184eb
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 1 deletion.
46 changes: 45 additions & 1 deletion src/libcore/iter/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

use convert::TryFrom;
use mem;
use ops::{self, Add, Sub};
use ops::{self, Add, Sub, Try};
use usize;

use super::{FusedIterator, TrustedLen};
Expand Down Expand Up @@ -397,6 +397,28 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
fn max(mut self) -> Option<A> {
self.next_back()
}

#[inline]
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
let mut accum = init;
if self.start <= self.end {
loop {
let (x, done) =
if self.start < self.end {
let n = self.start.add_one();
(mem::replace(&mut self.start, n), false)
} else {
self.end.replace_zero();
(self.start.replace_one(), true)
};
accum = f(accum, x)?;
if done { break }
}
}
Try::from_ok(accum)
}
}

#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
Expand All @@ -418,6 +440,28 @@ impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
_ => None,
}
}

#[inline]
fn try_rfold<B, F, R>(&mut self, init: B, mut f: F) -> R where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
let mut accum = init;
if self.start <= self.end {
loop {
let (x, done) =
if self.start < self.end {
let n = self.end.sub_one();
(mem::replace(&mut self.end, n), false)
} else {
self.start.replace_one();
(self.end.replace_zero(), true)
};
accum = f(accum, x)?;
if done { break }
}
}
Try::from_ok(accum)
}
}

#[unstable(feature = "fused", issue = "35602")]
Expand Down
20 changes: 20 additions & 0 deletions src/libcore/tests/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,26 @@ fn test_range_inclusive_min() {
assert_eq!(r.min(), None);
}

#[test]
fn test_range_inclusive_folds() {
assert_eq!((1..=10).sum::<i32>(), 55);
assert_eq!((1..=10).rev().sum::<i32>(), 55);

let mut it = 40..=50;
assert_eq!(it.try_fold(0, i8::checked_add), None);
assert_eq!(it, 44..=50);
assert_eq!(it.try_rfold(0, i8::checked_add), None);
assert_eq!(it, 44..=47);

let mut it = 10..=20;
assert_eq!(it.try_fold(0, |a,b| Some(a+b)), Some(165));
assert_eq!(it, 1..=0);

let mut it = 10..=20;
assert_eq!(it.try_rfold(0, |a,b| Some(a+b)), Some(165));
assert_eq!(it, 1..=0);
}

#[test]
fn test_repeat() {
let mut it = repeat(42);
Expand Down

0 comments on commit 4f184eb

Please sign in to comment.