Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Limit accounts data size #21719

Closed
73 changes: 73 additions & 0 deletions program-runtime/src/invoke_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,67 @@ impl ComputeMeter {
}
}

// bprumo TODO: doc where 128 GB came from. Move where the const is, also
// 128 GB
pub const DEFAULT_MAX_ACCOUNTS_DATA_LEN: usize = 128_000_000_000;

/// bprumo TODO: doc, and/or move
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub struct AccountsDataBudget {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is local to a tx, could just store remaining

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree. I'm still prototyping the tracking of the accounts data len, so was trying to keep around more information for now. Once I have a good idea on how this all fits together I was planning to reduce the impl.

/// The global maximum size for accounts data (i.e. DEFAULT_MAX_ACCOUNTS_DATA_LEN)
maximum: usize,
/// The current accounts data size (for a Bank)
current: usize,
}
impl AccountsDataBudget {
pub fn new(maximum: usize, current: usize) -> Self {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is maximum something that is going to change dynamically? If not could encapsulate that in AccountDataBudget

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if the maximum will change dynamically yet. This is still to make prototyping easiest, and to allow tests to set a different maximum as well. I agree that the max could be in AccountsDataBudget directly.

Self { maximum, current }
}
pub fn maximum(&self) -> usize {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope. Just here for convenience while prototyping.

self.maximum
}
pub fn current(&self) -> usize {
self.current
}
pub fn remaining(&self) -> usize {
self.maximum().saturating_sub(self.current())
}
}

/// bprumo TODO: doc, and/or move
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
pub struct AccountsDataMeter {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably just need this, don't need the AccountDataBudget

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah. At the moment AccountsDataBudget is not really used for anything.

/// The amount of available accounts data space (i.e. AccountsDataBudget::remaining())
capacity: usize,
/// The amount available of accounts data space consumed. This value is used to update the
/// Bank after transactions are successfully processed.
consumed: usize,
}
impl AccountsDataMeter {
pub fn new_ref(capacity: usize) -> Rc<RefCell<Self>> {
Rc::new(RefCell::new(Self {
capacity,
consumed: 0,
}))
}
pub fn capacity(&self) -> usize {
self.capacity
}
pub fn consumed(&self) -> usize {
self.consumed
}
pub fn remaining(&self) -> usize {
self.capacity().saturating_sub(self.consumed())
}
pub fn consume(&mut self, amount: usize) -> Result<(), InstructionError> {
if amount > self.remaining() {
return Err(InstructionError::AccountsDataBudgetExceeded);
}
self.consumed = self.consumed.saturating_add(amount);
Ok(())
}
}

pub struct StackFrame<'a> {
pub number_of_program_accounts: usize,
pub keyed_accounts: Vec<KeyedAccount<'a>>,
Expand All @@ -134,6 +195,7 @@ impl<'a> StackFrame<'a> {
}
}

// bprumo NOTE: here's the invoke context
pub struct InvokeContext<'a> {
invoke_stack: Vec<StackFrame<'a>>,
rent: Rent,
Expand All @@ -145,6 +207,7 @@ pub struct InvokeContext<'a> {
compute_budget: ComputeBudget,
current_compute_budget: ComputeBudget,
compute_meter: Rc<RefCell<ComputeMeter>>,
accounts_data_meter: Rc<RefCell<AccountsDataMeter>>,
executors: Rc<RefCell<Executors>>,
pub instruction_recorder: Option<&'a InstructionRecorder>,
pub feature_set: Arc<FeatureSet>,
Expand All @@ -163,6 +226,7 @@ impl<'a> InvokeContext<'a> {
sysvars: &'a [(Pubkey, Vec<u8>)],
log_collector: Option<Rc<RefCell<LogCollector>>>,
compute_budget: ComputeBudget,
accounts_data_budget: AccountsDataBudget,
executors: Rc<RefCell<Executors>>,
feature_set: Arc<FeatureSet>,
blockhash: Hash,
Expand All @@ -179,6 +243,7 @@ impl<'a> InvokeContext<'a> {
current_compute_budget: compute_budget,
compute_budget,
compute_meter: ComputeMeter::new_ref(compute_budget.max_units),
accounts_data_meter: AccountsDataMeter::new_ref(accounts_data_budget.remaining()),
executors,
instruction_recorder: None,
feature_set,
Expand All @@ -200,6 +265,7 @@ impl<'a> InvokeContext<'a> {
&[],
Some(LogCollector::new_ref()),
ComputeBudget::default(),
AccountsDataBudget::new(DEFAULT_MAX_ACCOUNTS_DATA_LEN, 0),
Rc::new(RefCell::new(Executors::default())),
Arc::new(FeatureSet::all_enabled()),
Hash::default(),
Expand Down Expand Up @@ -508,6 +574,7 @@ impl<'a> InvokeContext<'a> {
// Verify the called program has not misbehaved
let do_support_realloc = self.feature_set.is_active(&do_support_realloc::id());
for (account, prev_size) in prev_account_sizes.iter() {
// bprumo NOTE: looks like this checks for size changes...
if !do_support_realloc && *prev_size != account.borrow().data().len() && *prev_size != 0
{
// Only support for `CreateAccount` at this time.
Expand Down Expand Up @@ -636,6 +703,7 @@ impl<'a> InvokeContext<'a> {
Ok((message, caller_write_privileges, program_indices))
}

// bprumo NOTE: here's where the CPI instructions are processed
/// Process a cross-program instruction
pub fn process_instruction(
&mut self,
Expand Down Expand Up @@ -783,6 +851,11 @@ impl<'a> InvokeContext<'a> {
self.compute_meter.clone()
}

/// bprumo TODO: doc
pub fn accounts_data_meter(&self) -> Rc<RefCell<AccountsDataMeter>> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

our convention is get_account_data_meter

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do!

Rc::clone(&self.accounts_data_meter)
}

/// Loaders may need to do work in order to execute a program. Cache
/// the work that can be re-used across executions
pub fn add_executor(&self, pubkey: &Pubkey, executor: Arc<dyn Executor>) {
Expand Down
111 changes: 99 additions & 12 deletions runtime/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,14 @@ struct IndexAccountMapEntry<'a> {

type GenerateIndexAccountsMap<'a> = HashMap<Pubkey, IndexAccountMapEntry<'a>>;

#[derive(Debug, Default, Clone, Copy)]
struct GenerateIndexForSlotResult {
Copy link
Contributor

@jackcmay jackcmay Dec 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a better name here, something more descriptive of what it actually represents?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, is for getting the accounts data len during deserialization of a snapshot.

insert_time_us: u64,
num_accounts: u64,
num_accounts_rent_exempt: u64,
accounts_data_len: usize,
}

impl AccountsDb {
pub fn default_for_tests() -> Self {
Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None, None)
Expand Down Expand Up @@ -6657,21 +6665,21 @@ impl AccountsDb {
accounts_map
}

/// return time_us, # accts rent exempt, total # accts
fn generate_index_for_slot<'a>(
&self,
accounts_map: GenerateIndexAccountsMap<'a>,
slot: &Slot,
rent_collector: &RentCollector,
) -> (u64, u64, u64) {
) -> GenerateIndexForSlotResult {
if accounts_map.is_empty() {
return (0, 0, 0);
return GenerateIndexForSlotResult::default();
}

let secondary = !self.account_indexes.is_empty();

let mut rent_exempt = 0;
let len = accounts_map.len();
let mut num_accounts_rent_exempt = 0;
let mut accounts_data_len = 0;
let num_accounts = accounts_map.len();
let items = accounts_map.into_iter().map(
|(
pubkey,
Expand All @@ -6689,12 +6697,15 @@ impl AccountsDb {
&self.account_indexes,
);
}
if !stored_account.is_zero_lamport() {
accounts_data_len += stored_account.data().len();
}

if !rent_collector.should_collect_rent(&pubkey, &stored_account, false) || {
let (_rent_due, exempt) = rent_collector.get_rent_due(&stored_account);
exempt
} {
rent_exempt += 1;
num_accounts_rent_exempt += 1;
}

(
Expand All @@ -6709,17 +6720,22 @@ impl AccountsDb {
},
);

let (dirty_pubkeys, insert_us) = self
let (dirty_pubkeys, insert_time_us) = self
.accounts_index
.insert_new_if_missing_into_primary_index(*slot, len, items);
.insert_new_if_missing_into_primary_index(*slot, num_accounts, items);

// dirty_pubkeys will contain a pubkey if an item has multiple rooted entries for
// a given pubkey. If there is just a single item, there is no cleaning to
// be done on that pubkey. Use only those pubkeys with multiple updates.
if !dirty_pubkeys.is_empty() {
self.uncleaned_pubkeys.insert(*slot, dirty_pubkeys);
}
(insert_us, rent_exempt, len as u64)
GenerateIndexForSlotResult {
insert_time_us,
num_accounts: num_accounts as u64,
num_accounts_rent_exempt,
accounts_data_len,
}
}

fn filler_unique_id_bytes() -> usize {
Expand Down Expand Up @@ -6855,7 +6871,7 @@ impl AccountsDb {
limit_load_slot_count_from_snapshot: Option<usize>,
verify: bool,
genesis_config: &GenesisConfig,
) {
) -> usize {
let mut slots = self.storage.all_slots();
#[allow(clippy::stable_sort_primitive)]
slots.sort();
Expand All @@ -6870,6 +6886,7 @@ impl AccountsDb {
genesis_config.slots_per_year(),
&genesis_config.rent,
);
let accounts_data_len = AtomicUsize::new(0);

// pass == 0 always runs and generates the index
// pass == 1 only runs if verify == true.
Expand Down Expand Up @@ -6915,10 +6932,16 @@ impl AccountsDb {

let insert_us = if pass == 0 {
// generate index
let (insert_us, rent_exempt_this_slot, total_this_slot) =
self.generate_index_for_slot(accounts_map, slot, &rent_collector);
let GenerateIndexForSlotResult {
insert_time_us: insert_us,
num_accounts: total_this_slot,
num_accounts_rent_exempt: rent_exempt_this_slot,
accounts_data_len: accounts_data_len_this_slot,
} = self.generate_index_for_slot(accounts_map, slot, &rent_collector);
rent_exempt.fetch_add(rent_exempt_this_slot, Ordering::Relaxed);
total_duplicates.fetch_add(total_this_slot, Ordering::Relaxed);
accounts_data_len
.fetch_add(accounts_data_len_this_slot, Ordering::Relaxed);
insert_us
} else {
// verify index matches expected and measure the time to get all items
Expand Down Expand Up @@ -6972,6 +6995,69 @@ impl AccountsDb {
})
.sum();

if pass == 0 {
// subtract data.len() from accounts_data_len for all old accounts which are in the index twice
let mut timer = Measure::start("handle accounts data len duplicates");
let mut unique_pubkeys = HashSet::<Pubkey>::default();
self.uncleaned_pubkeys.iter().for_each(|entry| {
entry.value().iter().for_each(|pubkey| {
unique_pubkeys.insert(*pubkey);
})
});
unique_pubkeys
.into_iter()
.collect::<Vec<_>>()
.par_chunks(4096)
.for_each(|pubkeys| {
// subract out older data.len() for each pubkey
let mut accounts_data_len_from_duplicates = 0;
pubkeys.into_iter().for_each(|pubkey| {
if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey)
{
let list = entry.slot_list();
if list.len() < 2 {
return;
}
let mut list = list.clone();
list.sort_unstable_by(|a, b| a.0.cmp(&b.0));
assert!(list[0].0 < list[1].0); // greatest to least
list.into_iter()
.rev()
.skip(1)
.for_each(|(slot, account_info)| {
let maybe_storage_entry = self
.storage
.get_account_storage_entry(slot, account_info.store_id);
let mut accessor = LoadedAccountAccessor::Stored(
maybe_storage_entry
.map(|entry| (entry, account_info.offset)),
);
let loaded_account =
accessor.check_and_get_loaded_account();
let account = loaded_account.take_account();
if !account.is_zero_lamport() {
accounts_data_len_from_duplicates +=
account.data().len();
}
});
}
});
accounts_data_len
.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed);
});
timer.stop();
info!(
"accounts data len: {}, {}",
accounts_data_len.load(Ordering::Relaxed),
timer
);
error!(
"bprumo DEBUG: generate_index(), accounts data len: {}, {}",
accounts_data_len.load(Ordering::Relaxed),
timer
);
}

let storage_info_timings = storage_info_timings.into_inner().unwrap();

let mut index_flush_us = 0;
Expand Down Expand Up @@ -7009,6 +7095,7 @@ impl AccountsDb {
}
timings.report();
}
accounts_data_len.load(Ordering::Relaxed)
}

fn update_storage_info(
Expand Down
Loading