-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Limit accounts data size #21719
Limit accounts data size #21719
Changes from all commits
dac3b2a
dfd136c
7ebf1d5
c6a1985
238a6e9
5a60296
7fc7b6e
166bfc1
9ec6dab
18e4638
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -108,6 +108,67 @@ impl ComputeMeter { | |
} | ||
} | ||
|
||
// bprumo TODO: doc where 128 GB came from. Move where the const is, also | ||
// 128 GB | ||
pub const DEFAULT_MAX_ACCOUNTS_DATA_LEN: usize = 128_000_000_000; | ||
|
||
/// bprumo TODO: doc, and/or move | ||
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] | ||
pub struct AccountsDataBudget { | ||
/// The global maximum size for accounts data (i.e. DEFAULT_MAX_ACCOUNTS_DATA_LEN) | ||
maximum: usize, | ||
/// The current accounts data size (for a Bank) | ||
current: usize, | ||
} | ||
impl AccountsDataBudget { | ||
pub fn new(maximum: usize, current: usize) -> Self { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is maximum something that is going to change dynamically? If not could encapsulate that in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if the maximum will change dynamically yet. This is still to make prototyping easiest, and to allow tests to set a different maximum as well. I agree that the max could be in |
||
Self { maximum, current } | ||
} | ||
pub fn maximum(&self) -> usize { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope. Just here for convenience while prototyping. |
||
self.maximum | ||
} | ||
pub fn current(&self) -> usize { | ||
self.current | ||
} | ||
pub fn remaining(&self) -> usize { | ||
self.maximum().saturating_sub(self.current()) | ||
} | ||
} | ||
|
||
/// bprumo TODO: doc, and/or move | ||
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] | ||
pub struct AccountsDataMeter { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably just need this, don't need the AccountDataBudget There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yah. At the moment |
||
/// The amount of available accounts data space (i.e. AccountsDataBudget::remaining()) | ||
capacity: usize, | ||
/// The amount available of accounts data space consumed. This value is used to update the | ||
/// Bank after transactions are successfully processed. | ||
consumed: usize, | ||
} | ||
impl AccountsDataMeter { | ||
pub fn new_ref(capacity: usize) -> Rc<RefCell<Self>> { | ||
Rc::new(RefCell::new(Self { | ||
capacity, | ||
consumed: 0, | ||
})) | ||
} | ||
pub fn capacity(&self) -> usize { | ||
self.capacity | ||
} | ||
pub fn consumed(&self) -> usize { | ||
self.consumed | ||
} | ||
pub fn remaining(&self) -> usize { | ||
self.capacity().saturating_sub(self.consumed()) | ||
} | ||
pub fn consume(&mut self, amount: usize) -> Result<(), InstructionError> { | ||
if amount > self.remaining() { | ||
return Err(InstructionError::AccountsDataBudgetExceeded); | ||
} | ||
self.consumed = self.consumed.saturating_add(amount); | ||
Ok(()) | ||
} | ||
} | ||
|
||
pub struct StackFrame<'a> { | ||
pub number_of_program_accounts: usize, | ||
pub keyed_accounts: Vec<KeyedAccount<'a>>, | ||
|
@@ -134,6 +195,7 @@ impl<'a> StackFrame<'a> { | |
} | ||
} | ||
|
||
// bprumo NOTE: here's the invoke context | ||
pub struct InvokeContext<'a> { | ||
invoke_stack: Vec<StackFrame<'a>>, | ||
rent: Rent, | ||
|
@@ -145,6 +207,7 @@ pub struct InvokeContext<'a> { | |
compute_budget: ComputeBudget, | ||
current_compute_budget: ComputeBudget, | ||
compute_meter: Rc<RefCell<ComputeMeter>>, | ||
accounts_data_meter: Rc<RefCell<AccountsDataMeter>>, | ||
executors: Rc<RefCell<Executors>>, | ||
pub instruction_recorder: Option<&'a InstructionRecorder>, | ||
pub feature_set: Arc<FeatureSet>, | ||
|
@@ -163,6 +226,7 @@ impl<'a> InvokeContext<'a> { | |
sysvars: &'a [(Pubkey, Vec<u8>)], | ||
log_collector: Option<Rc<RefCell<LogCollector>>>, | ||
compute_budget: ComputeBudget, | ||
accounts_data_budget: AccountsDataBudget, | ||
executors: Rc<RefCell<Executors>>, | ||
feature_set: Arc<FeatureSet>, | ||
blockhash: Hash, | ||
|
@@ -179,6 +243,7 @@ impl<'a> InvokeContext<'a> { | |
current_compute_budget: compute_budget, | ||
compute_budget, | ||
compute_meter: ComputeMeter::new_ref(compute_budget.max_units), | ||
accounts_data_meter: AccountsDataMeter::new_ref(accounts_data_budget.remaining()), | ||
executors, | ||
instruction_recorder: None, | ||
feature_set, | ||
|
@@ -200,6 +265,7 @@ impl<'a> InvokeContext<'a> { | |
&[], | ||
Some(LogCollector::new_ref()), | ||
ComputeBudget::default(), | ||
AccountsDataBudget::new(DEFAULT_MAX_ACCOUNTS_DATA_LEN, 0), | ||
Rc::new(RefCell::new(Executors::default())), | ||
Arc::new(FeatureSet::all_enabled()), | ||
Hash::default(), | ||
|
@@ -508,6 +574,7 @@ impl<'a> InvokeContext<'a> { | |
// Verify the called program has not misbehaved | ||
let do_support_realloc = self.feature_set.is_active(&do_support_realloc::id()); | ||
for (account, prev_size) in prev_account_sizes.iter() { | ||
// bprumo NOTE: looks like this checks for size changes... | ||
if !do_support_realloc && *prev_size != account.borrow().data().len() && *prev_size != 0 | ||
{ | ||
// Only support for `CreateAccount` at this time. | ||
|
@@ -636,6 +703,7 @@ impl<'a> InvokeContext<'a> { | |
Ok((message, caller_write_privileges, program_indices)) | ||
} | ||
|
||
// bprumo NOTE: here's where the CPI instructions are processed | ||
/// Process a cross-program instruction | ||
pub fn process_instruction( | ||
&mut self, | ||
|
@@ -783,6 +851,11 @@ impl<'a> InvokeContext<'a> { | |
self.compute_meter.clone() | ||
} | ||
|
||
/// bprumo TODO: doc | ||
pub fn accounts_data_meter(&self) -> Rc<RefCell<AccountsDataMeter>> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. our convention is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do! |
||
Rc::clone(&self.accounts_data_meter) | ||
} | ||
|
||
/// Loaders may need to do work in order to execute a program. Cache | ||
/// the work that can be re-used across executions | ||
pub fn add_executor(&self, pubkey: &Pubkey, executor: Arc<dyn Executor>) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1518,6 +1518,14 @@ struct IndexAccountMapEntry<'a> { | |
|
||
type GenerateIndexAccountsMap<'a> = HashMap<Pubkey, IndexAccountMapEntry<'a>>; | ||
|
||
#[derive(Debug, Default, Clone, Copy)] | ||
struct GenerateIndexForSlotResult { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe a better name here, something more descriptive of what it actually represents? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, is for getting the accounts data len during deserialization of a snapshot. |
||
insert_time_us: u64, | ||
num_accounts: u64, | ||
num_accounts_rent_exempt: u64, | ||
accounts_data_len: usize, | ||
} | ||
|
||
impl AccountsDb { | ||
pub fn default_for_tests() -> Self { | ||
Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None, None) | ||
|
@@ -6657,21 +6665,21 @@ impl AccountsDb { | |
accounts_map | ||
} | ||
|
||
/// return time_us, # accts rent exempt, total # accts | ||
fn generate_index_for_slot<'a>( | ||
&self, | ||
accounts_map: GenerateIndexAccountsMap<'a>, | ||
slot: &Slot, | ||
rent_collector: &RentCollector, | ||
) -> (u64, u64, u64) { | ||
) -> GenerateIndexForSlotResult { | ||
if accounts_map.is_empty() { | ||
return (0, 0, 0); | ||
return GenerateIndexForSlotResult::default(); | ||
} | ||
|
||
let secondary = !self.account_indexes.is_empty(); | ||
|
||
let mut rent_exempt = 0; | ||
let len = accounts_map.len(); | ||
let mut num_accounts_rent_exempt = 0; | ||
let mut accounts_data_len = 0; | ||
let num_accounts = accounts_map.len(); | ||
let items = accounts_map.into_iter().map( | ||
|( | ||
pubkey, | ||
|
@@ -6689,12 +6697,15 @@ impl AccountsDb { | |
&self.account_indexes, | ||
); | ||
} | ||
if !stored_account.is_zero_lamport() { | ||
accounts_data_len += stored_account.data().len(); | ||
} | ||
|
||
if !rent_collector.should_collect_rent(&pubkey, &stored_account, false) || { | ||
let (_rent_due, exempt) = rent_collector.get_rent_due(&stored_account); | ||
exempt | ||
} { | ||
rent_exempt += 1; | ||
num_accounts_rent_exempt += 1; | ||
} | ||
|
||
( | ||
|
@@ -6709,17 +6720,22 @@ impl AccountsDb { | |
}, | ||
); | ||
|
||
let (dirty_pubkeys, insert_us) = self | ||
let (dirty_pubkeys, insert_time_us) = self | ||
.accounts_index | ||
.insert_new_if_missing_into_primary_index(*slot, len, items); | ||
.insert_new_if_missing_into_primary_index(*slot, num_accounts, items); | ||
|
||
// dirty_pubkeys will contain a pubkey if an item has multiple rooted entries for | ||
// a given pubkey. If there is just a single item, there is no cleaning to | ||
// be done on that pubkey. Use only those pubkeys with multiple updates. | ||
if !dirty_pubkeys.is_empty() { | ||
self.uncleaned_pubkeys.insert(*slot, dirty_pubkeys); | ||
} | ||
(insert_us, rent_exempt, len as u64) | ||
GenerateIndexForSlotResult { | ||
insert_time_us, | ||
num_accounts: num_accounts as u64, | ||
num_accounts_rent_exempt, | ||
accounts_data_len, | ||
} | ||
} | ||
|
||
fn filler_unique_id_bytes() -> usize { | ||
|
@@ -6855,7 +6871,7 @@ impl AccountsDb { | |
limit_load_slot_count_from_snapshot: Option<usize>, | ||
verify: bool, | ||
genesis_config: &GenesisConfig, | ||
) { | ||
) -> usize { | ||
let mut slots = self.storage.all_slots(); | ||
#[allow(clippy::stable_sort_primitive)] | ||
slots.sort(); | ||
|
@@ -6870,6 +6886,7 @@ impl AccountsDb { | |
genesis_config.slots_per_year(), | ||
&genesis_config.rent, | ||
); | ||
let accounts_data_len = AtomicUsize::new(0); | ||
|
||
// pass == 0 always runs and generates the index | ||
// pass == 1 only runs if verify == true. | ||
|
@@ -6915,10 +6932,16 @@ impl AccountsDb { | |
|
||
let insert_us = if pass == 0 { | ||
// generate index | ||
let (insert_us, rent_exempt_this_slot, total_this_slot) = | ||
self.generate_index_for_slot(accounts_map, slot, &rent_collector); | ||
let GenerateIndexForSlotResult { | ||
insert_time_us: insert_us, | ||
num_accounts: total_this_slot, | ||
num_accounts_rent_exempt: rent_exempt_this_slot, | ||
accounts_data_len: accounts_data_len_this_slot, | ||
} = self.generate_index_for_slot(accounts_map, slot, &rent_collector); | ||
rent_exempt.fetch_add(rent_exempt_this_slot, Ordering::Relaxed); | ||
total_duplicates.fetch_add(total_this_slot, Ordering::Relaxed); | ||
accounts_data_len | ||
.fetch_add(accounts_data_len_this_slot, Ordering::Relaxed); | ||
insert_us | ||
} else { | ||
// verify index matches expected and measure the time to get all items | ||
|
@@ -6972,6 +6995,69 @@ impl AccountsDb { | |
}) | ||
.sum(); | ||
|
||
if pass == 0 { | ||
// subtract data.len() from accounts_data_len for all old accounts which are in the index twice | ||
let mut timer = Measure::start("handle accounts data len duplicates"); | ||
let mut unique_pubkeys = HashSet::<Pubkey>::default(); | ||
self.uncleaned_pubkeys.iter().for_each(|entry| { | ||
entry.value().iter().for_each(|pubkey| { | ||
unique_pubkeys.insert(*pubkey); | ||
}) | ||
}); | ||
unique_pubkeys | ||
.into_iter() | ||
.collect::<Vec<_>>() | ||
.par_chunks(4096) | ||
.for_each(|pubkeys| { | ||
// subract out older data.len() for each pubkey | ||
let mut accounts_data_len_from_duplicates = 0; | ||
pubkeys.into_iter().for_each(|pubkey| { | ||
if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey) | ||
{ | ||
let list = entry.slot_list(); | ||
if list.len() < 2 { | ||
return; | ||
} | ||
let mut list = list.clone(); | ||
list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); | ||
assert!(list[0].0 < list[1].0); // greatest to least | ||
list.into_iter() | ||
.rev() | ||
.skip(1) | ||
.for_each(|(slot, account_info)| { | ||
let maybe_storage_entry = self | ||
.storage | ||
.get_account_storage_entry(slot, account_info.store_id); | ||
let mut accessor = LoadedAccountAccessor::Stored( | ||
maybe_storage_entry | ||
.map(|entry| (entry, account_info.offset)), | ||
); | ||
let loaded_account = | ||
accessor.check_and_get_loaded_account(); | ||
let account = loaded_account.take_account(); | ||
if !account.is_zero_lamport() { | ||
accounts_data_len_from_duplicates += | ||
account.data().len(); | ||
} | ||
}); | ||
} | ||
}); | ||
accounts_data_len | ||
.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed); | ||
}); | ||
timer.stop(); | ||
info!( | ||
"accounts data len: {}, {}", | ||
accounts_data_len.load(Ordering::Relaxed), | ||
timer | ||
); | ||
error!( | ||
"bprumo DEBUG: generate_index(), accounts data len: {}, {}", | ||
accounts_data_len.load(Ordering::Relaxed), | ||
timer | ||
); | ||
} | ||
|
||
let storage_info_timings = storage_info_timings.into_inner().unwrap(); | ||
|
||
let mut index_flush_us = 0; | ||
|
@@ -7009,6 +7095,7 @@ impl AccountsDb { | |
} | ||
timings.report(); | ||
} | ||
accounts_data_len.load(Ordering::Relaxed) | ||
} | ||
|
||
fn update_storage_info( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this is local to a tx, could just store remaining
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I agree. I'm still prototyping the tracking of the accounts data len, so was trying to keep around more information for now. Once I have a good idea on how this all fits together I was planning to reduce the impl.