Skip to content

Commit

Permalink
Auto merge of #113344 - scottmcm:alt-slice-zst-handing, r=the8472
Browse files Browse the repository at this point in the history
Get `!nonnull` metadata on slice iterators, without `assume`s

This updates the non-ZST paths to read the end pointer through a pointer-to-`NonNull`, so that they all get `!nonnull` metadata.

That means that the last `assume(!ptr.is_null())` can be deleted, without impacting codegen -- the codegen tests confirm the LLVM-IR ends up exactly the same as before.
  • Loading branch information
bors committed Jul 21, 2023
2 parents e2a7ba2 + 34732e8 commit c720a9c
Show file tree
Hide file tree
Showing 12 changed files with 170 additions and 93 deletions.
24 changes: 24 additions & 0 deletions library/core/src/ptr/non_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,30 @@ impl<T: ?Sized> NonNull<T> {
// And the caller promised the `delta` is sound to add.
unsafe { NonNull { pointer: self.pointer.add(delta) } }
}

/// See [`pointer::sub`] for semantics and safety requirements.
#[inline]
pub(crate) const unsafe fn sub(self, delta: usize) -> Self
where
T: Sized,
{
// SAFETY: We require that the delta stays in-bounds of the object, and
// thus it cannot become null, as no legal objects can be allocated
// in such as way that the null address is part of them.
// And the caller promised the `delta` is sound to subtract.
unsafe { NonNull { pointer: self.pointer.sub(delta) } }
}

/// See [`pointer::sub_ptr`] for semantics and safety requirements.
#[inline]
pub(crate) const unsafe fn sub_ptr(self, subtrahend: Self) -> usize
where
T: Sized,
{
// SAFETY: The caller promised that this is safe to do, and
// the non-nullness is irrelevant to the operation.
unsafe { self.pointer.sub_ptr(subtrahend.pointer) }
}
}

impl<T> NonNull<[T]> {
Expand Down
25 changes: 13 additions & 12 deletions library/core/src/slice/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::iter::{
use crate::marker::{PhantomData, Send, Sized, Sync};
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZeroUsize;
use crate::ptr::{invalid, invalid_mut, NonNull};
use crate::ptr::{self, invalid, invalid_mut, NonNull};

use super::{from_raw_parts, from_raw_parts_mut};

Expand Down Expand Up @@ -68,7 +68,7 @@ pub struct Iter<'a, T: 'a> {
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr::invalid(len)`.
end: *const T,
end_or_len: *const T,
_marker: PhantomData<&'a T>,
}

Expand All @@ -90,9 +90,9 @@ impl<'a, T> Iter<'a, T> {
let ptr = slice.as_ptr();
// SAFETY: Similar to `IterMut::new`.
unsafe {
let end = if T::IS_ZST { invalid(slice.len()) } else { ptr.add(slice.len()) };
let end_or_len = if T::IS_ZST { invalid(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr as *mut T), end, _marker: PhantomData }
Self { ptr: NonNull::new_unchecked(ptr as *mut T), end_or_len, _marker: PhantomData }
}
}

Expand Down Expand Up @@ -128,7 +128,7 @@ impl<'a, T> Iter<'a, T> {
}
}

iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, {
iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, as_ref, {
fn is_sorted_by<F>(self, mut compare: F) -> bool
where
Self: Sized,
Expand All @@ -142,7 +142,7 @@ iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, {
impl<T> Clone for Iter<'_, T> {
#[inline]
fn clone(&self) -> Self {
Iter { ptr: self.ptr, end: self.end, _marker: self._marker }
Iter { ptr: self.ptr, end_or_len: self.end_or_len, _marker: self._marker }
}
}

Expand Down Expand Up @@ -189,7 +189,7 @@ pub struct IterMut<'a, T: 'a> {
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr::invalid_mut(len)`.
end: *mut T,
end_or_len: *mut T,
_marker: PhantomData<&'a mut T>,
}

Expand Down Expand Up @@ -220,15 +220,16 @@ impl<'a, T> IterMut<'a, T> {
// for direct pointer equality with `ptr` to check if the iterator is
// done.
//
// In the case of a ZST, the end pointer is just the start pointer plus
// the length, to also allows for the fast `ptr == end` check.
// In the case of a ZST, the end pointer is just the length. It's never
// used as a pointer at all, and thus it's fine to have no provenance.
//
// See the `next_unchecked!` and `is_empty!` macros as well as the
// `post_inc_start` method for more information.
unsafe {
let end = if T::IS_ZST { invalid_mut(slice.len()) } else { ptr.add(slice.len()) };
let end_or_len =
if T::IS_ZST { invalid_mut(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr), end, _marker: PhantomData }
Self { ptr: NonNull::new_unchecked(ptr), end_or_len, _marker: PhantomData }
}
}

Expand Down Expand Up @@ -360,7 +361,7 @@ impl<T> AsRef<[T]> for IterMut<'_, T> {
// }
// }

iterator! {struct IterMut -> *mut T, &'a mut T, mut, {mut}, {}}
iterator! {struct IterMut -> *mut T, &'a mut T, mut, {mut}, as_mut, {}}

/// An internal abstraction over the splitting iterators, so that
/// splitn, splitn_mut etc can be implemented once.
Expand Down
146 changes: 79 additions & 67 deletions library/core/src/slice/iter/macros.rs
Original file line number Diff line number Diff line change
@@ -1,45 +1,62 @@
//! Macros used by iterators of slice.

// Shrinks the iterator when T is a ZST, setting the length to `new_len`.
// `new_len` must not exceed `self.len()`.
macro_rules! zst_set_len {
($self: ident, $new_len: expr) => {{
/// Convenience & performance macro for consuming the `end_or_len` field, by
/// giving a `(&mut) usize` or `(&mut) NonNull<T>` depending whether `T` is
/// or is not a ZST respectively.
///
/// Internally, this reads the `end` through a pointer-to-`NonNull` so that
/// it'll get the appropriate non-null metadata in the backend without needing
/// to call `assume` manually.
macro_rules! if_zst {
(mut $this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

// SAFETY: same as `invalid(_mut)`, but the macro doesn't know
// which versions of that function to call, so open-code it.
$self.end = unsafe { mem::transmute::<usize, _>($new_len) };
if T::IS_ZST {
// SAFETY: for ZSTs, the pointer is storing a provenance-free length,
// so consuming and updating it as a `usize` is fine.
let $len = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<usize>() };
$zst_body
} else {
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
let $end = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<NonNull<T>>() };
$other_body
}
}};
}
($this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

// Shrinks the iterator when T is a ZST, reducing the length by `n`.
// `n` must not exceed `self.len()`.
macro_rules! zst_shrink {
($self: ident, $n: ident) => {
let new_len = $self.end.addr() - $n;
zst_set_len!($self, new_len);
};
if T::IS_ZST {
let $len = $this.end_or_len.addr();
$zst_body
} else {
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
let $end = unsafe { *ptr::addr_of!($this.end_or_len).cast::<NonNull<T>>() };
$other_body
}
}};
}

// Inlining is_empty and len makes a huge performance difference
macro_rules! is_empty {
($self: ident) => {
if T::IS_ZST { $self.end.addr() == 0 } else { $self.ptr.as_ptr() as *const _ == $self.end }
if_zst!($self,
len => len == 0,
end => $self.ptr == end,
)
};
}

macro_rules! len {
($self: ident) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

if T::IS_ZST {
$self.end.addr()
} else {
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
// SAFETY: by the type invariant pointers are aligned and `start <= end`
unsafe { $self.end.sub_ptr($self.ptr.as_ptr()) }
}
if_zst!($self,
len => len,
end => {
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
// SAFETY: by the type invariant pointers are aligned and `start <= end`
unsafe { end.sub_ptr($self.ptr) }
},
)
}};
}

Expand All @@ -50,20 +67,21 @@ macro_rules! iterator {
$elem:ty,
$raw_mut:tt,
{$( $mut_:tt )?},
$into_ref:ident,
{$($extra:tt)*}
) => {
// Returns the first element and moves the start of the iterator forwards by 1.
// Greatly improves performance compared to an inlined function. The iterator
// must not be empty.
macro_rules! next_unchecked {
($self: ident) => {& $( $mut_ )? *$self.post_inc_start(1)}
($self: ident) => { $self.post_inc_start(1).$into_ref() }
}

// Returns the last element and moves the end of the iterator backwards by 1.
// Greatly improves performance compared to an inlined function. The iterator
// must not be empty.
macro_rules! next_back_unchecked {
($self: ident) => {& $( $mut_ )? *$self.pre_dec_end(1)}
($self: ident) => { $self.pre_dec_end(1).$into_ref() }
}

impl<'a, T> $name<'a, T> {
Expand All @@ -80,33 +98,40 @@ macro_rules! iterator {
// returning the old start.
// Unsafe because the offset must not exceed `self.len()`.
#[inline(always)]
unsafe fn post_inc_start(&mut self, offset: usize) -> * $raw_mut T {
unsafe fn post_inc_start(&mut self, offset: usize) -> NonNull<T> {
let old = self.ptr;
if T::IS_ZST {
zst_shrink!(self, offset);
} else {
// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// so this new pointer is inside `self` and thus guaranteed to be non-null.
self.ptr = unsafe { self.ptr.add(offset) };

// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// so this new pointer is inside `self` and thus guaranteed to be non-null.
unsafe {
if_zst!(mut self,
len => *len = len.unchecked_sub(offset),
_end => self.ptr = self.ptr.add(offset),
);
}
old.as_ptr()
old
}

// Helper function for moving the end of the iterator backwards by `offset` elements,
// returning the new end.
// Unsafe because the offset must not exceed `self.len()`.
#[inline(always)]
unsafe fn pre_dec_end(&mut self, offset: usize) -> * $raw_mut T {
if T::IS_ZST {
zst_shrink!(self, offset);
self.ptr.as_ptr()
} else {
unsafe fn pre_dec_end(&mut self, offset: usize) -> NonNull<T> {
if_zst!(mut self,
// SAFETY: By our precondition, `offset` can be at most the
// current length, so the subtraction can never overflow.
len => unsafe {
*len = len.unchecked_sub(offset);
self.ptr
},
// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// which is guaranteed to not overflow an `isize`. Also, the resulting pointer
// is in bounds of `slice`, which fulfills the other requirements for `offset`.
self.end = unsafe { self.end.sub(offset) };
self.end
}
end => unsafe {
*end = end.sub(offset);
*end
},
)
}
}

Expand All @@ -131,13 +156,9 @@ macro_rules! iterator {
fn next(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks

// SAFETY: `assume` call is safe because slices over non-ZSTs must
// have a non-null end pointer. The call to `next_unchecked!` is
// SAFETY: The call to `next_unchecked!` is
// safe since we check if the iterator is empty first.
unsafe {
if !<T>::IS_ZST {
assume(!self.end.is_null());
}
if is_empty!(self) {
None
} else {
Expand All @@ -161,14 +182,10 @@ macro_rules! iterator {
fn nth(&mut self, n: usize) -> Option<$elem> {
if n >= len!(self) {
// This iterator is now empty.
if T::IS_ZST {
zst_set_len!(self, 0);
} else {
// SAFETY: end can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr
unsafe {
self.ptr = NonNull::new_unchecked(self.end as *mut T);
}
}
if_zst!(mut self,
len => *len = 0,
end => self.ptr = *end,
);
return None;
}
// SAFETY: We are in bounds. `post_inc_start` does the right thing even for ZSTs.
Expand Down Expand Up @@ -375,13 +392,9 @@ macro_rules! iterator {
fn next_back(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks

// SAFETY: `assume` call is safe because slices over non-ZSTs must
// have a non-null end pointer. The call to `next_back_unchecked!`
// SAFETY: The call to `next_back_unchecked!`
// is safe since we check if the iterator is empty first.
unsafe {
if !<T>::IS_ZST {
assume(!self.end.is_null());
}
if is_empty!(self) {
None
} else {
Expand All @@ -394,11 +407,10 @@ macro_rules! iterator {
fn nth_back(&mut self, n: usize) -> Option<$elem> {
if n >= len!(self) {
// This iterator is now empty.
if T::IS_ZST {
zst_set_len!(self, 0);
} else {
self.end = self.ptr.as_ptr();
}
if_zst!(mut self,
len => *len = 0,
end => *end = self.ptr,
);
return None;
}
// SAFETY: We are in bounds. `pre_dec_end` does the right thing even for ZSTs.
Expand Down
1 change: 1 addition & 0 deletions tests/codegen/iter-repeat-n-trivial-drop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub fn iter_repeat_n_next(it: &mut std::iter::RepeatN<NotCopy>) -> Option<NotCop
// CHECK: [[EMPTY]]:
// CHECK-NOT: br
// CHECK: phi i16
// CHECK-SAME: [ %[[VAL]], %[[NOT_EMPTY]] ]
// CHECK-NOT: br
// CHECK: ret

Expand Down
4 changes: 2 additions & 2 deletions tests/codegen/slice-iter-len-eq-zero.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ type Demo = [u8; 3];
#[no_mangle]
pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
// CHECK-NOT: sub
// CHECK: %_0 = icmp eq {{i8\*|ptr}} {{%1|%0}}, {{%1|%0}}
// CHECK: ret i1 %_0
// CHECK: %[[RET:.+]] = icmp eq {{i8\*|ptr}} {{%1|%0}}, {{%1|%0}}
// CHECK: ret i1 %[[RET]]
y.len() == 0
}

Expand Down
Loading

0 comments on commit c720a9c

Please sign in to comment.