From cd3d6e88bc9d02ed5e00536e3a1a70d2472d2f9b Mon Sep 17 00:00:00 2001 From: Lukas Bergdoll Date: Wed, 4 Sep 2024 19:37:49 +0200 Subject: [PATCH] Shrink heapsort further by combining sift_down loops --- core/src/slice/sort/unstable/heapsort.rs | 36 +++++++++++------------ core/src/slice/sort/unstable/mod.rs | 5 +--- core/src/slice/sort/unstable/quicksort.rs | 7 ++--- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/core/src/slice/sort/unstable/heapsort.rs b/core/src/slice/sort/unstable/heapsort.rs index 27e2ad588ea09..35bfe2959cd78 100644 --- a/core/src/slice/sort/unstable/heapsort.rs +++ b/core/src/slice/sort/unstable/heapsort.rs @@ -1,46 +1,46 @@ //! This module contains a branchless heapsort as fallback for unstable quicksort. -use crate::{intrinsics, ptr}; +use crate::{cmp, intrinsics, ptr}; /// Sorts `v` using heapsort, which guarantees *O*(*n* \* log(*n*)) worst-case. /// /// Never inline this, it sits the main hot-loop in `recurse` and is meant as unlikely algorithmic /// fallback. -/// -/// SAFETY: The caller has to guarantee that `v.len()` >= 2. #[inline(never)] -pub(crate) unsafe fn heapsort(v: &mut [T], is_less: &mut F) +pub(crate) fn heapsort(v: &mut [T], is_less: &mut F) where F: FnMut(&T, &T) -> bool, { - // SAFETY: See function safety. - unsafe { - intrinsics::assume(v.len() >= 2); - - // Build the heap in linear time. - for i in (0..v.len() / 2).rev() { - sift_down(v, i, is_less); - } + let len = v.len(); - // Pop maximal elements from the heap. - for i in (1..v.len()).rev() { + for i in (0..len + len / 2).rev() { + let sift_idx = if i >= len { + i - len + } else { v.swap(0, i); - sift_down(&mut v[..i], 0, is_less); + 0 + }; + + // SAFETY: The above calculation ensures that `sift_idx` is either 0 or + // `(len..(len + (len / 2))) - len`, which simplifies to `0..(len / 2)`. + // This guarantees the required `sift_idx <= len`. + unsafe { + sift_down(&mut v[..cmp::min(i, len)], sift_idx, is_less); } } } // This binary heap respects the invariant `parent >= child`. // -// SAFETY: The caller has to guarantee that node < `v.len()`. -#[inline(never)] +// SAFETY: The caller has to guarantee that `node <= v.len()`. +#[inline(always)] unsafe fn sift_down(v: &mut [T], mut node: usize, is_less: &mut F) where F: FnMut(&T, &T) -> bool, { // SAFETY: See function safety. unsafe { - intrinsics::assume(node < v.len()); + intrinsics::assume(node <= v.len()); } let len = v.len(); diff --git a/core/src/slice/sort/unstable/mod.rs b/core/src/slice/sort/unstable/mod.rs index 130be21ee3fe8..953c27ab6f417 100644 --- a/core/src/slice/sort/unstable/mod.rs +++ b/core/src/slice/sort/unstable/mod.rs @@ -32,10 +32,7 @@ pub fn sort bool>(v: &mut [T], is_less: &mut F) { cfg_if! { if #[cfg(feature = "optimize_for_size")] { - // SAFETY: We checked that `len >= 2`. - unsafe { - heapsort::heapsort(v, is_less); - } + heapsort::heapsort(v, is_less); } else { // More advanced sorting methods than insertion sort are faster if called in // a hot loop for small inputs, but for general-purpose code the small diff --git a/core/src/slice/sort/unstable/quicksort.rs b/core/src/slice/sort/unstable/quicksort.rs index 9c59ccdb70005..4feef5deeb0fb 100644 --- a/core/src/slice/sort/unstable/quicksort.rs +++ b/core/src/slice/sort/unstable/quicksort.rs @@ -5,6 +5,8 @@ use crate::mem::{self, ManuallyDrop}; use crate::slice::sort::shared::pivot::choose_pivot; #[cfg(not(feature = "optimize_for_size"))] use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl; +#[cfg(not(feature = "optimize_for_size"))] +use crate::slice::sort::unstable::heapsort; use crate::{intrinsics, ptr}; /// Sorts `v` recursively. @@ -31,10 +33,7 @@ pub(crate) fn quicksort<'a, T, F>( // If too many bad pivot choices were made, simply fall back to heapsort in order to // guarantee `O(N x log(N))` worst-case. if limit == 0 { - // SAFETY: We assume the `small_sort` threshold is at least 1. - unsafe { - crate::slice::sort::unstable::heapsort::heapsort(v, is_less); - } + heapsort::heapsort(v, is_less); return; }