always pack a few newest ancient slots (#1730)

* always pack a few newest ancient slots * pr feedback * remove extra () * adds high slot tests --------- Co-authored-by: brooks <[email protected]>
anza-xyz · Jun 18, 2024 · d2b3476 · d2b3476
1 parent 2dc9508
commit d2b3476
Showing 1 changed file with 145 additions and 12 deletions.
diff --git a/accounts-db/src/ancient_append_vecs.rs b/accounts-db/src/ancient_append_vecs.rs
@@ -27,6 +27,10 @@ use {
     },
 };
 
+/// this many # of highest slot values should be treated as desirable to pack.
+/// This gives us high slots to move packed accounts into.
+const HIGH_SLOT_OFFSET: u64 = 100;
+
 /// ancient packing algorithm tuning per pass
 #[derive(Debug)]
 struct PackedAncientStorageTuning {
@@ -57,6 +61,9 @@ struct SlotInfo {
     alive_bytes: u64,
     /// true if this should be shrunk due to ratio
     should_shrink: bool,
+    /// this slot is a high slot #
+    /// It is important to include some high slot #s so that we have new slots to try each time pack runs.
+    is_high_slot: bool,
 }
 
 /// info for all storages in ancient slots
@@ -83,6 +90,7 @@ impl AncientSlotInfos {
         storage: Arc<AccountStorageEntry>,
         can_randomly_shrink: bool,
         ideal_size: NonZeroU64,
+        is_high_slot: bool,
     ) -> bool {
         let mut was_randomly_shrunk = false;
         let alive_bytes = storage.alive_bytes() as u64;
@@ -122,6 +130,7 @@ impl AncientSlotInfos {
                 storage,
                 alive_bytes,
                 should_shrink,
+                is_high_slot,
             });
             self.total_alive_bytes += alive_bytes;
         }
@@ -198,14 +207,18 @@ impl AncientSlotInfos {
             let storages_remaining = total_storages - i - 1;
 
             // if the remaining uncombined storages and the # of resulting
-            // combined ancient storages is less than the threshold, then
+            // combined ancient storages are less than the threshold, then
             // we've gone too far, so get rid of this entry and all after it.
-            // Every storage after this one is larger.
+            // Every storage after this one is larger than the ones we've chosen.
             // if we ever get to more than `max_resulting_storages` required ancient storages, that is enough to stop for now.
-            // It will take a while to create that many. This should be a limit that only affects
-            // extreme testing environments.
-            if storages_remaining + ancient_storages_required < low_threshold
-                || ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages)
+            // It will take a lot of time for the pack algorithm to create that many, and that is bad for system performance.
+            // This should be a limit that only affects extreme testing environments.
+            // We do not stop including entries until we have dealt with all the high slot #s. This allows the algorithm to continue
+            // to make progress each time it is called. There are exceptions that can cause the pack to fail, such as accounts with multiple
+            // refs.
+            if !info.is_high_slot
+                && (storages_remaining + ancient_storages_required < low_threshold
+                    || ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages))
             {
                 self.all_infos.truncate(i);
                 break;
@@ -229,10 +242,15 @@ impl AncientSlotInfos {
             return;
         }
 
-        // sort by 'should_shrink' then smallest capacity to largest
+        // sort by:
+        // 1. `high_slot`: we want to include new, high slots each time so that we try new slots
+        //     each time alg runs and have several high target slots for packed storages.
+        // 2. 'should_shrink' so we make progress on shrinking ancient storages
+        // 3. smallest capacity to largest so that we remove the most slots possible
         self.all_infos.sort_unstable_by(|l, r| {
-            r.should_shrink
-                .cmp(&l.should_shrink)
+            r.is_high_slot
+                .cmp(&l.is_high_slot)
+                .then_with(|| r.should_shrink.cmp(&l.should_shrink))
                 .then_with(|| l.capacity.cmp(&r.capacity))
         });
 
@@ -498,10 +516,20 @@ impl AccountsDb {
             ..AncientSlotInfos::default()
         };
         let mut randoms = 0;
+        let max_slot = slots.iter().max().cloned().unwrap_or_default();
+        // heuristic to include some # of newly eligible ancient slots so that the pack algorithm always makes progress
+        let high_slot_boundary = max_slot.saturating_sub(HIGH_SLOT_OFFSET);
+        let is_high_slot = |slot| slot >= high_slot_boundary;
 
         for slot in &slots {
             if let Some(storage) = self.storage.get_slot_storage_entry(*slot) {
-                if infos.add(*slot, storage, can_randomly_shrink, ideal_size) {
+                if infos.add(
+                    *slot,
+                    storage,
+                    can_randomly_shrink,
+                    ideal_size,
+                    is_high_slot(*slot),
+                ) {
                     randoms += 1;
                 }
             }
@@ -1081,12 +1109,13 @@ pub mod tests {
             append_vec::{aligned_stored_size, AppendVec, AppendVecStoredAccountMeta},
             storable_accounts::{tests::build_accounts_from_storage, StorableAccountsBySlot},
         },
+        rand::seq::SliceRandom as _,
         solana_sdk::{
             account::{AccountSharedData, ReadableAccount, WritableAccount},
             hash::Hash,
             pubkey::Pubkey,
         },
-        std::ops::Range,
+        std::{collections::HashSet, ops::Range},
         strum::IntoEnumIterator,
         strum_macros::EnumIter,
     };
@@ -1105,6 +1134,7 @@ pub mod tests {
         let original_stores = (0..slots)
             .filter_map(|slot| db.storage.get_slot_storage_entry((slot as Slot) + slot1))
             .collect::<Vec<_>>();
+        let is_high_slot = false;
         let slot_infos = original_stores
             .iter()
             .map(|storage| SlotInfo {
@@ -1113,6 +1143,7 @@ pub mod tests {
                 capacity: 0,
                 alive_bytes: 0,
                 should_shrink: false,
+                is_high_slot,
             })
             .collect();
         (
@@ -2379,6 +2410,7 @@ pub mod tests {
                 let mut infos = AncientSlotInfos::default();
                 let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
                 let alive_bytes_expected = storage.alive_bytes();
+                let high_slot = false;
                 match method {
                     TestCollectInfo::Add => {
                         // test lower level 'add'
@@ -2387,6 +2419,7 @@ pub mod tests {
                             Arc::clone(&storage),
                             can_randomly_shrink,
                             NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(),
+                            high_slot,
                         );
                     }
                     TestCollectInfo::CalcAncientSlotInfo => {
@@ -2445,12 +2478,14 @@ pub mod tests {
             let (db, slot1) = create_db_with_storages_and_index(alive, slots, None);
             let mut infos = AncientSlotInfos::default();
             let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
+            let high_slot = false;
             if call_add {
                 infos.add(
                     slot1,
                     Arc::clone(&storage),
                     can_randomly_shrink,
                     NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(),
+                    high_slot,
                 );
             } else {
                 infos = db.calc_ancient_slot_info(
@@ -2628,6 +2663,7 @@ pub mod tests {
                     capacity: 1,
                     alive_bytes: 1,
                     should_shrink: false,
+                    is_high_slot: false,
                 })
                 .collect(),
             shrink_indexes: (0..count).collect(),
@@ -2671,7 +2707,7 @@ pub mod tests {
     }
 
     #[test]
-    fn test_filter_by_smaller_capacity_sort() {
+    fn test_filter_by_smallest_capacity_sort() {
         // max is 6
         // 7 storages
         // storage[last] is big enough to cause us to need another storage
@@ -2729,6 +2765,101 @@ pub mod tests {
         }
     }
 
+    /// Test that we always include the high slots when filtering which ancient infos to pack
+    ///
+    /// If we have *more* high slots than max resulting storages set in the tuning parameters,
+    /// we should still have all the high slots after calling `filter_by_smallest_capacity().
+    #[test]
+    fn test_filter_by_smallest_capacity_high_slot_more() {
+        let tuning = default_tuning();
+
+        // Ensure we have more storages with high slots than the 'max resulting storages'.
+        let num_high_slots = tuning.max_resulting_storages.get() * 2;
+        let num_ancient_storages = num_high_slots * 3;
+        let mut infos = create_test_infos(num_ancient_storages as usize);
+        infos
+            .all_infos
+            .sort_unstable_by_key(|slot_info| slot_info.slot);
+        infos
+            .all_infos
+            .iter_mut()
+            .rev()
+            .take(num_high_slots as usize)
+            .for_each(|slot_info| {
+                slot_info.is_high_slot = true;
+            });
+        let slots_expected: Vec<_> = infos
+            .all_infos
+            .iter()
+            .filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot))
+            .collect();
+
+        // shuffle the infos so they actually need to be sorted
+        infos.all_infos.shuffle(&mut thread_rng());
+        infos.filter_by_smallest_capacity(&tuning);
+
+        infos
+            .all_infos
+            .sort_unstable_by_key(|slot_info| slot_info.slot);
+        let slots_actual: Vec<_> = infos
+            .all_infos
+            .iter()
+            .map(|slot_info| slot_info.slot)
+            .collect();
+        assert_eq!(infos.all_infos.len() as u64, num_high_slots);
+        assert_eq!(slots_actual, slots_expected);
+    }
+
+    /// Test that we always include the high slots when filtering which ancient infos to pack
+    ///
+    /// If we have *less* high slots than max resulting storages set in the tuning parameters,
+    /// we should still have all the high slots after calling `filter_by_smallest_capacity().
+    #[test]
+    fn test_filter_by_smallest_capacity_high_slot_less() {
+        let tuning = default_tuning();
+
+        // Ensure we have less storages with high slots than the 'max resulting storages'.
+        let num_high_slots = tuning.max_resulting_storages.get() / 2;
+        let num_ancient_storages = num_high_slots * 5;
+        let mut infos = create_test_infos(num_ancient_storages as usize);
+        infos
+            .all_infos
+            .sort_unstable_by_key(|slot_info| slot_info.slot);
+        infos
+            .all_infos
+            .iter_mut()
+            .rev()
+            .take(num_high_slots as usize)
+            .for_each(|slot_info| {
+                slot_info.is_high_slot = true;
+            });
+        let high_slots: Vec<_> = infos
+            .all_infos
+            .iter()
+            .filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot))
+            .collect();
+
+        // shuffle the infos so they actually need to be sorted
+        infos.all_infos.shuffle(&mut thread_rng());
+        infos.filter_by_smallest_capacity(&tuning);
+
+        infos
+            .all_infos
+            .sort_unstable_by_key(|slot_info| slot_info.slot);
+        let slots_actual: HashSet<_> = infos
+            .all_infos
+            .iter()
+            .map(|slot_info| slot_info.slot)
+            .collect();
+        assert_eq!(
+            infos.all_infos.len() as u64,
+            tuning.max_resulting_storages.get() - 1,
+        );
+        assert!(high_slots
+            .iter()
+            .all(|high_slot| slots_actual.contains(high_slot)));
+    }
+
     fn test(filter: bool, infos: &mut AncientSlotInfos, tuning: &PackedAncientStorageTuning) {
         if filter {
             infos.filter_by_smallest_capacity(tuning);
@@ -3212,13 +3343,15 @@ pub mod tests {
                 capacity: info1_capacity,
                 alive_bytes: 0,
                 should_shrink: false,
+                is_high_slot: false,
             };
             let info2 = SlotInfo {
                 storage: storage.clone(),
                 slot,
                 capacity: 2,
                 alive_bytes: 1,
                 should_shrink: false,
+                is_high_slot: false,
             };
             let mut infos = AncientSlotInfos {
                 all_infos: vec![info1, info2],