Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

disk index: use bits in ref count to store occupied #31004

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions bucket_map/src/bucket_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,18 @@ impl<O: BucketOccupied> BucketStorage<O> {
unsafe { slice.get_unchecked_mut(0) }
}

pub(crate) fn get_mut_from_parts<T: Sized>(item_slice: &mut [u8]) -> &mut T {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: We can remove the Sized bound from these two functions now. (Can be done in a different PR.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed this up in the next one.

debug_assert!(std::mem::size_of::<T>() <= item_slice.len());
let item = item_slice.as_mut_ptr() as *mut T;
unsafe { &mut *item }
}

pub(crate) fn get_from_parts<T: Sized>(item_slice: &[u8]) -> &T {
debug_assert!(std::mem::size_of::<T>() <= item_slice.len());
let item = item_slice.as_ptr() as *const T;
unsafe { &*item }
}

pub fn get_cell_slice<T>(&self, ix: u64, len: u64) -> &[T] {
let start = self.get_start_offset_no_header(ix);
let slice = {
Expand Down
96 changes: 87 additions & 9 deletions bucket_map/src/index_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@ use {
};

/// allocated in `contents` in a BucketStorage
pub struct BucketWithBitVec<T: 'static> {
pub struct BucketWithBitVec {
pub occupied: BitVec,
_phantom: PhantomData<&'static T>,
}

impl<T> BucketOccupied for BucketWithBitVec<T> {
impl BucketOccupied for BucketWithBitVec {
fn occupy(&mut self, element: &mut [u8], ix: usize) {
assert!(self.is_free(element, ix));
self.occupied.set(ix as u64, true);
Expand All @@ -36,13 +35,45 @@ impl<T> BucketOccupied for BucketWithBitVec<T> {
fn new(num_elements: usize) -> Self {
Self {
occupied: BitVec::new_fill(false, num_elements as u64),
}
}
}

#[derive(Debug, Default)]
pub struct IndexBucketUsingRefCountBits<T: 'static> {
_phantom: PhantomData<&'static T>,
}

impl<T: 'static> BucketOccupied for IndexBucketUsingRefCountBits<T> {
fn occupy(&mut self, element: &mut [u8], ix: usize) {
assert!(self.is_free(element, ix));
let entry: &mut IndexEntry<T> =
BucketStorage::<IndexBucketUsingRefCountBits<T>>::get_mut_from_parts(element);
entry.set_slot_count_enum_value(OccupiedEnum::Occupied);
}
fn free(&mut self, element: &mut [u8], ix: usize) {
assert!(!self.is_free(element, ix));
let entry: &mut IndexEntry<T> =
BucketStorage::<IndexBucketUsingRefCountBits<T>>::get_mut_from_parts(element);
entry.set_slot_count_enum_value(OccupiedEnum::Free);
}
fn is_free(&self, element: &[u8], _ix: usize) -> bool {
let entry: &IndexEntry<T> =
BucketStorage::<IndexBucketUsingRefCountBits<T>>::get_from_parts(element);
matches!(entry.get_slot_count_enum(), OccupiedEnum::Free)
}
fn offset_to_first_data() -> usize {
0
}
fn new(_num_elements: usize) -> Self {
Self {
_phantom: PhantomData,
}
}
}

pub type DataBucket = BucketWithBitVec<()>;
pub type IndexBucket<T> = BucketWithBitVec<T>;
pub type DataBucket = BucketWithBitVec;
pub type IndexBucket<T> = IndexBucketUsingRefCountBits<T>;

/// contains the index of an entry in the index bucket.
/// This type allows us to call methods to interact with the index entry on this type.
Expand All @@ -67,15 +98,15 @@ pub struct IndexEntry<T: 'static> {
#[repr(C)]
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
struct PackedRefCount {
/// reserved for future use
unused: B2,
/// tag for Enum
slot_count_enum: B2,
/// ref_count of this entry. We don't need any where near 62 bits for this value
ref_count: B62,
}

/// required fields when an index element references the data file
#[repr(C)]
#[derive(Debug, Default, Copy, Clone)]
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
pub(crate) struct MultipleSlots {
// if the bucket doubled, the index can be recomputed using storage_cap_and_offset.create_bucket_capacity_pow2
storage_cap_and_offset: PackedStorage,
Expand Down Expand Up @@ -139,6 +170,36 @@ impl MultipleSlots {
}
}

#[repr(u8)]
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum OccupiedEnum {
/// this spot is free (ie. not occupied)
Free = 0,
/// this spot is occupied
Occupied = 1,
}

impl<T: 'static> IndexEntry<T> {
/// enum value stored in 2 spare bits taken from ref_count
fn get_slot_count_enum(&self) -> OccupiedEnum {
match self.packed_ref_count.slot_count_enum() {
0 => OccupiedEnum::Free,
1 => OccupiedEnum::Occupied,
_ => {
panic!("unexpected value");
}
}
}

/// enum value stored in 2 spare bits taken from ref_count
fn set_slot_count_enum_value(&mut self, value: OccupiedEnum) {
self.packed_ref_count.set_slot_count_enum(match value {
OccupiedEnum::Free => 0,
OccupiedEnum::Occupied => 1,
});
}
}

/// Pack the storage offset and capacity-when-crated-pow2 fields into a single u64
#[bitfield(bits = 64)]
#[repr(C)]
Expand All @@ -148,7 +209,7 @@ struct PackedStorage {
offset: B56,
}

impl<T> IndexEntryPlaceInBucket<T> {
impl<T: 'static> IndexEntryPlaceInBucket<T> {
pub fn init(&self, index_bucket: &mut BucketStorage<IndexBucket<T>>, pubkey: &Pubkey) {
let index_entry = index_bucket.get_mut::<IndexEntry<T>>(self.ix);
index_entry.key = *pubkey;
Expand All @@ -172,6 +233,23 @@ impl<T> IndexEntryPlaceInBucket<T> {
.multiple_slots
}

pub(crate) fn get_slot_count_enum(
&self,
index_bucket: &BucketStorage<IndexBucket<T>>,
) -> OccupiedEnum {
let index_entry = index_bucket.get::<IndexEntry<T>>(self.ix);
index_entry.get_slot_count_enum()
}

pub(crate) fn set_slot_count_enum_value(
&self,
index_bucket: &mut BucketStorage<IndexBucket<T>>,
value: OccupiedEnum,
) {
let index_entry = index_bucket.get_mut::<IndexEntry<T>>(self.ix);
index_entry.set_slot_count_enum_value(value);
}

pub fn ref_count(&self, index_bucket: &BucketStorage<IndexBucket<T>>) -> RefCount {
let index_entry = index_bucket.get::<IndexEntry<T>>(self.ix);
index_entry.packed_ref_count.ref_count()
Expand Down