From 89354f4b7e56979bd47aaee11336172730ec4c8c Mon Sep 17 00:00:00 2001 From: jeff washington Date: Thu, 30 Mar 2023 08:32:01 -0500 Subject: [PATCH] disk index: store single slot list in index entry --- bucket_map/src/bucket.rs | 180 ++++++++++------ bucket_map/src/bucket_map.rs | 2 +- bucket_map/src/bucket_storage.rs | 9 +- bucket_map/src/index_entry.rs | 353 +++++++++++++++++++++---------- 4 files changed, 373 insertions(+), 171 deletions(-) diff --git a/bucket_map/src/bucket.rs b/bucket_map/src/bucket.rs index ef27b609643a1d..8c505e1d1ec5ed 100644 --- a/bucket_map/src/bucket.rs +++ b/bucket_map/src/bucket.rs @@ -4,7 +4,10 @@ use { bucket_map::BucketMapError, bucket_stats::BucketMapStats, bucket_storage::{BucketOccupied, BucketStorage, DEFAULT_CAPACITY_POW2}, - index_entry::{DataBucket, IndexBucket, IndexEntry, IndexEntryPlaceInBucket}, + index_entry::{ + DataBucket, IndexBucket, IndexEntry, IndexEntryPlaceInBucket, MultipleSlots, + SlotCountEnum, + }, MaxSearch, RefCount, }, rand::{thread_rng, Rng}, @@ -77,8 +80,14 @@ impl Reallocated { } } +/// when updating the index, this keeps track of the previous data entry which will need to be freed +struct DataFileEntryToFree { + bucket_ix: usize, + location: u64, +} + // >= 2 instances of BucketStorage per 'bucket' in the bucket map. 1 for index, >= 1 for data -pub struct Bucket { +pub struct Bucket { drives: Arc>, //index pub index: BucketStorage>, @@ -264,7 +273,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket { pub fn try_write( &mut self, key: &Pubkey, - data: impl Iterator, + mut data: impl Iterator, data_len: usize, ref_count: RefCount, ) -> Result<(), BucketMapError> { @@ -288,72 +297,125 @@ impl<'b, T: Clone + Copy + 'static> Bucket { }; elem.set_ref_count(&mut self.index, ref_count); - let bucket_ix = elem.data_bucket_ix(&self.index); let num_slots = data_len as u64; - if best_fit_bucket == bucket_ix && elem.num_slots(&self.index) > 0 { - let current_bucket = &mut self.data[bucket_ix as usize]; - // in place update - let elem_loc = elem.data_loc(&self.index, current_bucket); - assert!(!current_bucket.is_free(elem_loc)); - let slice: &mut [T] = current_bucket.get_mut_cell_slice(elem_loc, data_len as u64); - elem.set_num_slots(&mut self.index, num_slots); - - slice.iter_mut().zip(data).for_each(|(dest, src)| { - *dest = *src; + let use_data_storage = num_slots > 1; + + if !use_data_storage { + // new data stored should be stored in elem.`first_element` + // new data len is 0 or 1 + let mut free_info = None; + if let SlotCountEnum::MultipleSlots(multiple_slots) = + elem.get_slot_count_enum(&self.index) + { + // free old data location + let bucket_ix = + IndexEntry::::data_bucket_from_num_slots(multiple_slots.num_slots); + free_info = Some(( + bucket_ix as usize, + IndexEntryPlaceInBucket::::data_loc( + &self.data[bucket_ix as usize], + multiple_slots, + ), + )); + } + elem.set_slot_count_enum_value( + &mut self.index, + if let Some(single_element) = data.next() { + SlotCountEnum::OneSlotInIndex(single_element) + } else { + SlotCountEnum::ZeroSlots + }, + ); + if let Some((bucket_ix, elem_loc)) = free_info { + // free the entry in the data bucket the data was previously stored in + self.data[bucket_ix].free(elem_loc); + } + return Ok(()); + } + // storing the slot list requires using the data file + + let mut old_data_entry_to_free = None; + // see if old elements were in a data file + if let Some(multiple_slots) = elem.get_multiple_slots_mut(&mut self.index) { + let bucket_ix = + IndexEntry::::data_bucket_from_num_slots(multiple_slots.num_slots) as usize; + let current_bucket = &mut self.data[bucket_ix]; + let elem_loc = IndexEntryPlaceInBucket::::data_loc(current_bucket, multiple_slots); + + if best_fit_bucket == bucket_ix as u64 { + // in place update in same data file + assert!(!current_bucket.is_free(elem_loc)); + let slice: &mut [T] = current_bucket.get_mut_cell_slice(elem_loc, data_len as u64); + multiple_slots.num_slots = num_slots; + + slice.iter_mut().zip(data).for_each(|(dest, src)| { + *dest = *src; + }); + return Ok(()); + } + + old_data_entry_to_free = Some(DataFileEntryToFree { + bucket_ix, + location: elem_loc, }); - Ok(()) - } else { - // need to move the allocation to a best fit spot - let best_bucket = &self.data[best_fit_bucket as usize]; - let current_bucket = &self.data[bucket_ix as usize]; - let cap_power = best_bucket.capacity_pow2; - let cap = best_bucket.capacity(); - let pos = thread_rng().gen_range(0, cap); - // max search is increased here by a lot for this search. The idea is that we just have to find an empty bucket somewhere. - // We don't mind waiting on a new write (by searching longer). Writing is done in the background only. - // Wasting space by doubling the bucket size is worse behavior. We expect more - // updates and fewer inserts, so we optimize for more compact data. - // We can accomplish this by increasing how many locations we're willing to search for an empty data cell. - // For the index bucket, it is more like a hash table and we have to exhaustively search 'max_search' to prove an item does not exist. - // And we do have to support the 'does not exist' case with good performance. So, it makes sense to grow the index bucket when it is too large. - // For data buckets, the offset is stored in the index, so it is directly looked up. So, the only search is on INSERT or update to a new sized value. - for i in pos..pos + (max_search * 10).min(cap) { - let ix = i % cap; - if best_bucket.is_free(ix) { - let elem_loc = elem.data_loc(&self.index, current_bucket); - let old_slots = elem.num_slots(&self.index); - elem.set_storage_offset(&mut self.index, ix); - elem.set_storage_capacity_when_created_pow2( - &mut self.index, - best_bucket.capacity_pow2, - ); - elem.set_num_slots(&mut self.index, num_slots); - if old_slots > 0 { - let current_bucket = &mut self.data[bucket_ix as usize]; - current_bucket.free(elem_loc); - } - //debug!( "DATA ALLOC {:?} {} {} {}", key, elem.data_location, best_bucket.capacity, elem_uid ); - if num_slots > 0 { - let best_bucket = &mut self.data[best_fit_bucket as usize]; - best_bucket.occupy(ix, false).unwrap(); - let slice = best_bucket.get_mut_cell_slice(ix, num_slots); - slice.iter_mut().zip(data).for_each(|(dest, src)| { - *dest = *src; - }); - } - return Ok(()); + } + + // need to move the allocation to a best fit spot + let best_bucket = &self.data[best_fit_bucket as usize]; + let cap_power = best_bucket.capacity_pow2; + let cap = best_bucket.capacity(); + let pos = thread_rng().gen_range(0, cap); + // max search is increased here by a lot for this search. The idea is that we just have to find an empty bucket somewhere. + // We don't mind waiting on a new write (by searching longer). Writing is done in the background only. + // Wasting space by doubling the bucket size is worse behavior. We expect more + // updates and fewer inserts, so we optimize for more compact data. + // We can accomplish this by increasing how many locations we're willing to search for an empty data cell. + // For the index bucket, it is more like a hash table and we have to exhaustively search 'max_search' to prove an item does not exist. + // And we do have to support the 'does not exist' case with good performance. So, it makes sense to grow the index bucket when it is too large. + // For data buckets, the offset is stored in the index, so it is directly looked up. So, the only search is on INSERT or update to a new sized value. + for i in pos..pos + (max_search * 10).min(cap) { + let ix = i % cap; + if best_bucket.is_free(ix) { + let mut multiple_slots = MultipleSlots::default(); + multiple_slots.set_storage_offset(ix); + multiple_slots.set_storage_capacity_when_created_pow2(best_bucket.capacity_pow2); + multiple_slots.num_slots = num_slots; + elem.set_slot_count_enum_value( + &mut self.index, + SlotCountEnum::MultipleSlots(&multiple_slots), + ); + //debug!( "DATA ALLOC {:?} {} {} {}", key, elem.data_location, best_bucket.capacity, elem_uid ); + if num_slots > 0 { + let best_bucket = &mut self.data[best_fit_bucket as usize]; + best_bucket.occupy(ix, false).unwrap(); + let slice = best_bucket.get_mut_cell_slice(ix, num_slots); + slice.iter_mut().zip(data).for_each(|(dest, src)| { + *dest = *src; + }); + } + if let Some(DataFileEntryToFree { + bucket_ix, + location, + }) = old_data_entry_to_free + { + // free the entry in the data bucket the data was previously stored in + self.data[bucket_ix].free(location); } + return Ok(()); } - Err(BucketMapError::DataNoSpace((best_fit_bucket, cap_power))) } + Err(BucketMapError::DataNoSpace((best_fit_bucket, cap_power))) } pub fn delete_key(&mut self, key: &Pubkey) { if let Some((elem, elem_ix)) = self.find_index_entry(key) { - if elem.num_slots(&self.index) > 0 { - let ix = elem.data_bucket_ix(&self.index) as usize; + if let SlotCountEnum::MultipleSlots(multiple_slots) = + elem.get_slot_count_enum(&self.index) + { + let ix = + IndexEntry::::data_bucket_from_num_slots(multiple_slots.num_slots) as usize; let data_bucket = &self.data[ix]; - let loc = elem.data_loc(&self.index, data_bucket); + let loc = IndexEntryPlaceInBucket::::data_loc(data_bucket, multiple_slots); let data_bucket = &mut self.data[ix]; //debug!( "DATA FREE {:?} {} {} {}", key, elem.data_location, data_bucket.capacity, elem_uid ); data_bucket.free(loc); diff --git a/bucket_map/src/bucket_map.rs b/bucket_map/src/bucket_map.rs index 0193642c408022..309eabf37d8494 100644 --- a/bucket_map/src/bucket_map.rs +++ b/bucket_map/src/bucket_map.rs @@ -365,7 +365,7 @@ mod tests { let v = (0..count) .map(|x| (x as usize, x as usize /*thread_rng().gen::()*/)) .collect::>(); - let rc = thread_rng().gen::(); + let rc = thread_rng().gen_range(0, RefCount::MAX >> 2); (v, rc) }; diff --git a/bucket_map/src/bucket_storage.rs b/bucket_map/src/bucket_storage.rs index 1fd63a51999052..34b2ff8f03d3e7 100644 --- a/bucket_map/src/bucket_storage.rs +++ b/bucket_map/src/bucket_storage.rs @@ -205,6 +205,13 @@ impl BucketStorage { unsafe { &mut *item } } + pub(crate) fn get_from_parts(item_slice: &[u8]) -> &T { + unsafe { + let item = item_slice.as_ptr() as *const T; + &*item + } + } + pub fn get_mut(&mut self, ix: u64) -> &mut T { let start = self.get_start_offset_no_header(ix); let item_slice = &mut self.mmap[start..]; @@ -373,7 +380,7 @@ mod test { let mut storage = BucketStorage::>::new( Arc::new(paths), 1, - 1, + std::mem::size_of::>() as u64, 1, Arc::default(), Arc::default(), diff --git a/bucket_map/src/index_entry.rs b/bucket_map/src/index_entry.rs index 0845905eb0f956..9fe6088fa41d5f 100644 --- a/bucket_map/src/index_entry.rs +++ b/bucket_map/src/index_entry.rs @@ -25,12 +25,11 @@ struct OccupiedHeader { } /// allocated in `contents` in a BucketStorage -pub struct BucketWithBitVec { +pub struct BucketWithBitVec { pub occupied: BitVec, - _phantom: PhantomData<&'static T>, } -impl BucketOccupied for BucketWithBitVec { +impl BucketOccupied for BucketWithBitVec { fn occupy(&mut self, element: &mut [u8], ix: usize) { assert!(self.is_free(element, ix)); self.occupied.set(ix as u64, true); @@ -49,13 +48,46 @@ impl BucketOccupied for BucketWithBitVec { fn new(num_elements: usize) -> Self { Self { occupied: BitVec::new_fill(false, num_elements as u64), + } + } +} + +/// allocated in `contents` in a BucketStorage +#[derive(Debug, Default)] +pub struct IndexBucketUsingRefCountBits { + _phantom: PhantomData, +} + +impl BucketOccupied for IndexBucketUsingRefCountBits { + fn occupy(&mut self, element: &mut [u8], _ix: usize) { + let entry: &mut IndexEntry = + BucketStorage::>::get_mut_from_parts(element); + assert!(matches!(entry.get_slot_count_enum(), SlotCountEnum::Free)); + entry.set_slot_count_enum_value(SlotCountEnum::ZeroSlots); + } + fn free(&mut self, element: &mut [u8], _ix: usize) { + let entry: &mut IndexEntry = + BucketStorage::>::get_mut_from_parts(element); + assert!(!matches!(entry.get_slot_count_enum(), SlotCountEnum::Free)); + entry.set_slot_count_enum_value(SlotCountEnum::Free); + } + fn is_free(&self, element: &[u8], _ix: usize) -> bool { + let entry: &IndexEntry = + BucketStorage::>::get_from_parts(element); + matches!(entry.get_slot_count_enum(), SlotCountEnum::Free) + } + fn offset_to_first_data() -> usize { + 0 + } + fn new(_num_elements: usize) -> Self { + Self { _phantom: PhantomData, } } } -pub type DataBucket = BucketWithBitVec<()>; -pub type IndexBucket = BucketWithBitVec; +pub type DataBucket = BucketWithBitVec; +pub type IndexBucket = IndexBucketUsingRefCountBits; /// contains the index of an entry in the index bucket. /// This type allows us to call methods to interact with the index entry on this type. @@ -65,28 +97,131 @@ pub struct IndexEntryPlaceInBucket { } #[repr(C)] -#[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[derive(Copy, Clone)] // one instance of this per item in the index // stored in the index bucket -pub struct IndexEntry { - pub key: Pubkey, // can this be smaller if we have reduced the keys into buckets already? - ref_count: RefCount, // can this be smaller? Do we ever need more than 4B refcounts? - storage_cap_and_offset: PackedStorage, - // if the bucket doubled, the index can be recomputed using create_bucket_capacity_pow2 - num_slots: Slot, // can this be smaller? epoch size should ~ be the max len. this is the num elements in the slot list - _phantom: PhantomData<&'static T>, +pub struct IndexEntry { + pub(crate) key: Pubkey, // can this be smaller if we have reduced the keys into buckets already? + packed_ref_count: PackedRefCount, + /// depends on the contents of ref_count.slot_count_enum + pub(crate) contents: SingleElementOrMultipleSlots, +} + +#[bitfield(bits = 64)] +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] +pub(crate) struct PackedRefCount { + /// tag for `SlotCountEnum` + pub(crate) slot_count_enum: B2, + /// ref_count of this entry. We don't need any where near 62 bits for this value + pub(crate) ref_count: B62, +} + +/// required fields when an index element references the data file +#[repr(C)] +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] +pub(crate) struct MultipleSlots { + // if the bucket doubled, the index can be recomputed using storage_cap_and_offset.create_bucket_capacity_pow2 + pub(crate) storage_cap_and_offset: PackedStorage, + /// num elements in the slot list + pub num_slots: Slot, +} + +impl MultipleSlots { + pub(crate) fn set_storage_capacity_when_created_pow2( + &mut self, + storage_capacity_when_created_pow2: u8, + ) { + self.storage_cap_and_offset + .set_capacity_when_created_pow2(storage_capacity_when_created_pow2) + } + + pub(crate) fn set_storage_offset(&mut self, storage_offset: u64) { + self.storage_cap_and_offset + .set_offset_checked(storage_offset) + .expect("New storage offset must fit into 7 bytes!") + } + + pub(crate) fn storage_capacity_when_created_pow2(&self) -> u8 { + self.storage_cap_and_offset.capacity_when_created_pow2() + } + + fn storage_offset(&self) -> u64 { + self.storage_cap_and_offset.offset() + } +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub(crate) union SingleElementOrMultipleSlots { + /// the slot list contains a single element. No need for an entry in the data file. + /// The element itself is stored in place in the index entry + pub(crate) single_element: T, + /// the slot list ocntains more than one element. This contains the reference to the data file. + pub(crate) multiple_slots: MultipleSlots, +} + +#[repr(u8)] +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum SlotCountEnum<'a, T> { + /// this spot is not allocated + Free = 0, + /// zero slots in the slot list + ZeroSlots = 1, + /// one slot in the slot list, it is stored in the index + OneSlotInIndex(&'a T) = 2, + /// > 1 slots, slots are stored in data file + MultipleSlots(&'a MultipleSlots) = 3, } /// Pack the storage offset and capacity-when-crated-pow2 fields into a single u64 #[bitfield(bits = 64)] #[repr(C)] #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] -struct PackedStorage { +pub(crate) struct PackedStorage { capacity_when_created_pow2: B8, offset: B56, } -impl IndexEntry { +impl IndexEntry { + pub(crate) fn get_slot_count_enum(&self) -> SlotCountEnum<'_, T> { + unsafe { + match self.packed_ref_count.slot_count_enum() { + 0 => SlotCountEnum::Free, + 1 => SlotCountEnum::ZeroSlots, + 2 => SlotCountEnum::OneSlotInIndex(&self.contents.single_element), + 3 => SlotCountEnum::MultipleSlots(&self.contents.multiple_slots), + _ => { + panic!("unexpected value"); + } + } + } + } + + pub(crate) fn get_multiple_slots_mut(&mut self) -> Option<&mut MultipleSlots> { + unsafe { + match self.packed_ref_count.slot_count_enum() { + 3 => Some(&mut self.contents.multiple_slots), + _ => None, + } + } + } + + pub(crate) fn set_slot_count_enum_value<'a>(&'a mut self, value: SlotCountEnum<'a, T>) { + self.packed_ref_count.set_slot_count_enum(match value { + SlotCountEnum::Free => 0, + SlotCountEnum::ZeroSlots => 1, + SlotCountEnum::OneSlotInIndex(single_element) => { + self.contents.single_element = *single_element; + 2 + } + SlotCountEnum::MultipleSlots(multiple_slots) => { + self.contents.multiple_slots = *multiple_slots; + 3 + } + }); + } + /// return closest bucket index fit for the slot slice. /// Since bucket size is 2^index, the return value is /// min index, such that 2^index >= num_slots @@ -102,96 +237,83 @@ impl IndexEntry { } } -impl IndexEntryPlaceInBucket { - pub fn init(&self, index_bucket: &mut BucketStorage>, pubkey: &Pubkey) { - let index_entry = index_bucket.get_mut::>(self.ix); - index_entry.key = *pubkey; - index_entry.ref_count = 0; - index_entry.storage_cap_and_offset = PackedStorage::default(); - index_entry.num_slots = 0; +impl IndexEntryPlaceInBucket { + pub(crate) fn get_slot_count_enum<'a>( + &self, + index_bucket: &'a BucketStorage>, + ) -> SlotCountEnum<'a, T> { + let index_entry = index_bucket.get::>(self.ix); + index_entry.get_slot_count_enum() } - pub fn set_storage_capacity_when_created_pow2( + pub(crate) fn get_multiple_slots_mut<'a>( &self, - index_bucket: &mut BucketStorage>, - storage_capacity_when_created_pow2: u8, - ) { - index_bucket - .get_mut::>(self.ix) - .storage_cap_and_offset - .set_capacity_when_created_pow2(storage_capacity_when_created_pow2) + index_bucket: &'a mut BucketStorage>, + ) -> Option<&'a mut MultipleSlots> { + let index_entry = index_bucket.get_mut::>(self.ix); + index_entry.get_multiple_slots_mut() } - pub fn set_storage_offset( + pub(crate) fn set_slot_count_enum_value<'a>( &self, - index_bucket: &mut BucketStorage>, - storage_offset: u64, + index_bucket: &'a mut BucketStorage>, + value: SlotCountEnum<'a, T>, ) { - index_bucket - .get_mut::>(self.ix) - .storage_cap_and_offset - .set_offset_checked(storage_offset) - .expect("New storage offset must fit into 7 bytes!"); + let index_entry = index_bucket.get_mut::>(self.ix); + index_entry.set_slot_count_enum_value(value); } - pub fn data_bucket_ix(&self, index_bucket: &BucketStorage>) -> u64 { - IndexEntry::::data_bucket_from_num_slots(self.num_slots(index_bucket)) + pub fn init(&self, index_bucket: &mut BucketStorage>, pubkey: &Pubkey) { + self.set_slot_count_enum_value(index_bucket, SlotCountEnum::ZeroSlots); + let index_entry = index_bucket.get_mut::>(self.ix); + index_entry.key = *pubkey; + index_entry.packed_ref_count.set_ref_count(0); } pub fn ref_count(&self, index_bucket: &BucketStorage>) -> RefCount { let index_entry = index_bucket.get::>(self.ix); - index_entry.ref_count - } - - fn storage_capacity_when_created_pow2( - &self, - index_bucket: &BucketStorage>, - ) -> u8 { - let index_entry = index_bucket.get::>(self.ix); - index_entry - .storage_cap_and_offset - .capacity_when_created_pow2() - } - - pub fn storage_offset(&self, index_bucket: &BucketStorage>) -> u64 { - index_bucket - .get::>(self.ix) - .storage_cap_and_offset - .offset() + index_entry.packed_ref_count.ref_count() } /// This function maps the original data location into an index in the current bucket storage. /// This is coupled with how we resize bucket storages. - pub fn data_loc( - &self, - index_bucket: &BucketStorage>, + pub(crate) fn data_loc( storage: &BucketStorage, + multiple_slots: &MultipleSlots, ) -> u64 { - let index_entry = index_bucket.get::>(self.ix); - self.storage_offset(index_bucket) - << (storage.capacity_pow2 - - index_entry - .storage_cap_and_offset - .capacity_when_created_pow2()) + multiple_slots.storage_offset() + << (storage.capacity_pow2 - multiple_slots.storage_capacity_when_created_pow2()) } pub fn read_value<'a>( &self, - index_bucket: &BucketStorage>, + index_bucket: &'a BucketStorage>, data_buckets: &'a [BucketStorage], ) -> Option<(&'a [T], RefCount)> { - let num_slots = self.num_slots(index_bucket); - let slice = if num_slots > 0 { - let data_bucket_ix = self.data_bucket_ix(index_bucket); - let data_bucket = &data_buckets[data_bucket_ix as usize]; - let loc = self.data_loc(index_bucket, data_bucket); - assert!(!data_bucket.is_free(loc)); - data_bucket.get_cell_slice(loc, num_slots) - } else { - // num_slots is 0. This means we don't have an actual allocation. - &[] - }; - Some((slice, self.ref_count(index_bucket))) + Some(( + match self.get_slot_count_enum(index_bucket) { + SlotCountEnum::ZeroSlots => { + // num_slots is 0. This means we don't have an actual allocation. + &[] + } + SlotCountEnum::OneSlotInIndex(single_element) => { + // only element is stored in the index entry + std::slice::from_ref(single_element) + } + SlotCountEnum::MultipleSlots(multiple_slots) => { + let data_bucket_ix = + IndexEntry::::data_bucket_from_num_slots(multiple_slots.num_slots); + let data_bucket = &data_buckets[data_bucket_ix as usize]; + let loc = Self::data_loc(data_bucket, multiple_slots); + assert!(!data_bucket.is_free(loc)); + data_bucket.get_cell_slice::(loc, multiple_slots.num_slots) + } + _ => { + unimplemented!(); + } + }, + self.ref_count(index_bucket), + )) } pub fn new(ix: u64) -> Self { @@ -212,15 +334,10 @@ impl IndexEntryPlaceInBucket { ref_count: RefCount, ) { let index_entry = index_bucket.get_mut::>(self.ix); - index_entry.ref_count = ref_count; - } - - pub fn num_slots(&self, index_bucket: &BucketStorage>) -> Slot { - index_bucket.get::>(self.ix).num_slots - } - - pub fn set_num_slots(&self, index_bucket: &mut BucketStorage>, num_slots: Slot) { - index_bucket.get_mut::>(self.ix).num_slots = num_slots; + index_entry + .packed_ref_count + .set_ref_count_checked(ref_count) + .expect("ref count must fit into 62 bits!") } } @@ -232,13 +349,17 @@ mod tests { tempfile::tempdir, }; - impl IndexEntry { + impl IndexEntry { pub fn new(key: Pubkey) -> Self { IndexEntry { key, - ref_count: 0, - storage_cap_and_offset: PackedStorage::default(), - num_slots: 0, + packed_ref_count: PackedRefCount::default(), + contents: SingleElementOrMultipleSlots { + multiple_slots: MultipleSlots { + storage_cap_and_offset: PackedStorage::default(), + num_slots: 0, + }, + }, } } } @@ -248,19 +369,17 @@ mod tests { #[test] fn test_api() { for offset in [0, 1, u32::MAX as u64] { - let (mut index_bucket, index) = index_entry_for_testing(); + let mut multiple_slots = MultipleSlots::default(); + if offset != 0 { - index.set_storage_offset(&mut index_bucket, offset); + multiple_slots.set_storage_offset(offset); } - assert_eq!(index.storage_offset(&index_bucket,), offset); - assert_eq!(index.storage_capacity_when_created_pow2(&index_bucket,), 0); + assert_eq!(multiple_slots.storage_offset(), offset); + assert_eq!(multiple_slots.storage_capacity_when_created_pow2(), 0); for pow in [1, 255, 0] { - index.set_storage_capacity_when_created_pow2(&mut index_bucket, pow); - assert_eq!(index.storage_offset(&index_bucket,), offset); - assert_eq!( - index.storage_capacity_when_created_pow2(&index_bucket,), - pow - ); + multiple_slots.set_storage_capacity_when_created_pow2(pow); + assert_eq!(multiple_slots.storage_offset(), offset); + assert_eq!(multiple_slots.storage_capacity_when_created_pow2(), pow); } } } @@ -268,7 +387,10 @@ mod tests { #[test] fn test_size() { assert_eq!(std::mem::size_of::(), 1 + 7); - assert_eq!(std::mem::size_of::>(), 32 + 8 + 8 + 8); + assert_eq!( + std::mem::size_of::>(), + 32 + 8 + (8 + 8).max(std::mem::size_of::()) + ); } fn index_bucket_for_testing() -> BucketStorage> { @@ -280,7 +402,7 @@ mod tests { BucketStorage::>::new( Arc::new(paths), 1, - std::mem::size_of::>() as u64, + std::mem::size_of::>() as u64, 1, Arc::default(), Arc::default(), @@ -298,25 +420,36 @@ mod tests { #[should_panic(expected = "New storage offset must fit into 7 bytes!")] fn test_set_storage_offset_value_too_large() { let too_big = 1 << 56; + let mut multiple_slots = MultipleSlots::default(); + multiple_slots.set_storage_offset(too_big); + } + + #[test] + #[should_panic(expected = "ref count must fit into 62 bits!")] + fn test_set_ref_count_too_large() { + let too_big = 1 << 62; let (mut index_bucket, index) = index_entry_for_testing(); - index.set_storage_offset(&mut index_bucket, too_big); + index.set_ref_count(&mut index_bucket, too_big); } #[test] fn test_data_bucket_from_num_slots() { for n in 0..512 { assert_eq!( - IndexEntry::data_bucket_from_num_slots(n), + IndexEntry::::data_bucket_from_num_slots(n), (n as f64).log2().ceil() as u64 ); } - assert_eq!(IndexEntry::data_bucket_from_num_slots(u32::MAX as u64), 32); assert_eq!( - IndexEntry::data_bucket_from_num_slots(u32::MAX as u64 + 1), + IndexEntry::::data_bucket_from_num_slots(u32::MAX as u64), + 32 + ); + assert_eq!( + IndexEntry::::data_bucket_from_num_slots(u32::MAX as u64 + 1), 32 ); assert_eq!( - IndexEntry::data_bucket_from_num_slots(u32::MAX as u64 + 2), + IndexEntry::::data_bucket_from_num_slots(u32::MAX as u64 + 2), 33 ); }