From 27110275e64ff17528bb4d56debae1f285a9ace8 Mon Sep 17 00:00:00 2001 From: jeff washington Date: Fri, 31 Mar 2023 10:37:26 -0500 Subject: [PATCH] disk index: use bits in ref count to store occupied --- bucket_map/src/bucket_storage.rs | 12 ++++ bucket_map/src/index_entry.rs | 116 +++++++++++++++++++++++-------- 2 files changed, 100 insertions(+), 28 deletions(-) diff --git a/bucket_map/src/bucket_storage.rs b/bucket_map/src/bucket_storage.rs index effda2cc97ab20..a8e9b953a33281 100644 --- a/bucket_map/src/bucket_storage.rs +++ b/bucket_map/src/bucket_storage.rs @@ -190,6 +190,18 @@ impl BucketStorage { unsafe { slice.get_unchecked_mut(0) } } + pub(crate) fn get_mut_from_parts(item_slice: &mut [u8]) -> &mut T { + debug_assert!(std::mem::size_of::() <= item_slice.len()); + let item = item_slice.as_mut_ptr() as *mut T; + unsafe { &mut *item } + } + + pub(crate) fn get_from_parts(item_slice: &[u8]) -> &T { + debug_assert!(std::mem::size_of::() <= item_slice.len()); + let item = item_slice.as_ptr() as *const T; + unsafe { &*item } + } + pub fn get_cell_slice(&self, ix: u64, len: u64) -> &[T] { let start = self.get_start_offset_no_header(ix); let slice = { diff --git a/bucket_map/src/index_entry.rs b/bucket_map/src/index_entry.rs index 34dd4ad827de01..7b8f3654a0b9f6 100644 --- a/bucket_map/src/index_entry.rs +++ b/bucket_map/src/index_entry.rs @@ -25,12 +25,11 @@ struct OccupiedHeader { } /// allocated in `contents` in a BucketStorage -pub struct BucketWithBitVec { +pub struct BucketWithBitVec { pub occupied: BitVec, - _phantom: PhantomData<&'static T>, } -impl BucketOccupied for BucketWithBitVec { +impl BucketOccupied for BucketWithBitVec { fn occupy(&mut self, element: &mut [u8], ix: usize) { assert!(self.is_free(element, ix)); self.occupied.set(ix as u64, true); @@ -49,13 +48,45 @@ impl BucketOccupied for BucketWithBitVec { fn new(num_elements: usize) -> Self { Self { occupied: BitVec::new_fill(false, num_elements as u64), + } + } +} + +#[derive(Debug, Default)] +pub struct IndexBucketUsingRefCountBits { + _phantom: PhantomData<&'static T>, +} + +impl BucketOccupied for IndexBucketUsingRefCountBits { + fn occupy(&mut self, element: &mut [u8], ix: usize) { + assert!(self.is_free(element, ix)); + let entry: &mut IndexEntry = + BucketStorage::>::get_mut_from_parts(element); + entry.set_slot_count_enum_value(OccupiedEnum::Occupied); + } + fn free(&mut self, element: &mut [u8], ix: usize) { + assert!(!self.is_free(element, ix)); + let entry: &mut IndexEntry = + BucketStorage::>::get_mut_from_parts(element); + entry.set_slot_count_enum_value(OccupiedEnum::Free); + } + fn is_free(&self, element: &[u8], _ix: usize) -> bool { + let entry: &IndexEntry = + BucketStorage::>::get_from_parts(element); + matches!(entry.get_slot_count_enum(), OccupiedEnum::Free) + } + fn offset_to_first_data() -> usize { + 0 + } + fn new(_num_elements: usize) -> Self { + Self { _phantom: PhantomData, } } } -pub type DataBucket = BucketWithBitVec<()>; -pub type IndexBucket = BucketWithBitVec; +pub type DataBucket = BucketWithBitVec; +pub type IndexBucket = IndexBucketUsingRefCountBits; /// contains the index of an entry in the index bucket. /// This type allows us to call methods to interact with the index entry on this type. @@ -80,15 +111,15 @@ pub struct IndexEntry { #[repr(C)] #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] struct PackedRefCount { - /// reserved for future use - unused: B2, + /// tag for Enum + slot_count_enum: B2, /// ref_count of this entry. We don't need any where near 62 bits for this value ref_count: B62, } /// required fields when an index element references the data file #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] pub(crate) struct MultipleSlots { // if the bucket doubled, the index can be recomputed using storage_cap_and_offset.create_bucket_capacity_pow2 storage_cap_and_offset: PackedStorage, @@ -152,6 +183,36 @@ impl MultipleSlots { } } +#[repr(u8)] +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum OccupiedEnum { + /// this spot is free (ie. not occupied) + Free = 0, + /// this spot is occupied + Occupied = 1, +} + +impl IndexEntry { + /// enum value stored in 2 spare bits taken from ref_count + fn get_slot_count_enum(&self) -> OccupiedEnum { + match self.packed_ref_count.slot_count_enum() { + 0 => OccupiedEnum::Free, + 1 => OccupiedEnum::Occupied, + _ => { + panic!("unexpected value"); + } + } + } + + /// enum value stored in 2 spare bits taken from ref_count + fn set_slot_count_enum_value<'a>(&'a mut self, value: OccupiedEnum) { + self.packed_ref_count.set_slot_count_enum(match value { + OccupiedEnum::Free => 0, + OccupiedEnum::Occupied => 1, + }); + } +} + /// Pack the storage offset and capacity-when-crated-pow2 fields into a single u64 #[bitfield(bits = 64)] #[repr(C)] @@ -161,7 +222,7 @@ struct PackedStorage { offset: B56, } -impl IndexEntryPlaceInBucket { +impl IndexEntryPlaceInBucket { pub fn init(&self, index_bucket: &mut BucketStorage>, pubkey: &Pubkey) { let index_entry = index_bucket.get_mut::>(self.ix); index_entry.key = *pubkey; @@ -169,24 +230,6 @@ impl IndexEntryPlaceInBucket { index_entry.multiple_slots = MultipleSlots::default(); } - pub fn set_storage_capacity_when_created_pow2( - &self, - index_bucket: &mut BucketStorage>, - storage_capacity_when_created_pow2: u8, - ) { - self.get_multiple_slots_mut(index_bucket) - .set_storage_capacity_when_created_pow2(storage_capacity_when_created_pow2); - } - - pub fn set_storage_offset( - &self, - index_bucket: &mut BucketStorage>, - storage_offset: u64, - ) { - self.get_multiple_slots_mut(index_bucket) - .set_storage_offset(storage_offset); - } - pub(crate) fn get_multiple_slots<'a>( &self, index_bucket: &'a BucketStorage>, @@ -203,6 +246,23 @@ impl IndexEntryPlaceInBucket { .multiple_slots } + pub(crate) fn get_slot_count_enum<'a>( + &self, + index_bucket: &'a BucketStorage>, + ) -> OccupiedEnum { + let index_entry = index_bucket.get::>(self.ix); + index_entry.get_slot_count_enum() + } + + pub(crate) fn set_slot_count_enum_value<'a>( + &self, + index_bucket: &'a mut BucketStorage>, + value: OccupiedEnum, + ) { + let index_entry = index_bucket.get_mut::>(self.ix); + index_entry.set_slot_count_enum_value(value); + } + pub fn ref_count(&self, index_bucket: &BucketStorage>) -> RefCount { let index_entry = index_bucket.get::>(self.ix); index_entry.packed_ref_count.ref_count() @@ -261,7 +321,7 @@ mod tests { tempfile::tempdir, }; - impl IndexEntry { + impl IndexEntry { pub fn new(key: Pubkey) -> Self { IndexEntry { key,