Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benchmark for execute_batch #34717

Merged
merged 4 commits into from
Jan 13, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions ledger/benches/blockstore_processor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#![allow(clippy::arithmetic_side_effects)]
#![feature(test)]

use {
rayon::{
iter::IndexedParallelIterator,
prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator},
},
solana_ledger::{
blockstore_processor::{execute_batch, TransactionBatchWithIndexes},
genesis_utils::{create_genesis_config, GenesisConfigInfo},
},
solana_program_runtime::timings::ExecuteTimings,
solana_runtime::{
bank::Bank, prioritization_fee_cache::PrioritizationFeeCache,
transaction_batch::TransactionBatch,
},
solana_sdk::{
account::Account, feature_set::apply_cost_tracker_during_replay, signature::Keypair,
signer::Signer, stake_history::Epoch, system_program, system_transaction,
transaction::SanitizedTransaction,
},
std::{borrow::Cow, sync::Arc},
test::Bencher,
};

extern crate test;

fn create_accounts(num: usize) -> Vec<Keypair> {
(0..num).into_par_iter().map(|_| Keypair::new()).collect()
}

fn create_funded_accounts(bank: &Bank, num: usize) -> Vec<Keypair> {
assert!(
num.is_power_of_two(),
"must be power of 2 for parallel funding tree"
);
let accounts = create_accounts(num);

accounts.par_iter().for_each(|account| {
bank.store_account(
&account.pubkey(),
&Account {
lamports: 5100,
data: vec![],
owner: system_program::id(),
executable: false,
rent_epoch: Epoch::MAX,
},
);
});

accounts
}

fn create_transactions(bank: &Bank, num: usize) -> Vec<SanitizedTransaction> {
let funded_accounts = create_funded_accounts(bank, 2 * num);
funded_accounts
.into_par_iter()
.chunks(2)
.map(|chunk| {
let from = &chunk[0];
let to = &chunk[1];
system_transaction::transfer(from, &to.pubkey(), 1, bank.last_blockhash())
})
.map(SanitizedTransaction::from_transaction_for_tests)
.collect()
}

struct BenchFrame {
bank: Arc<Bank>,
prioritization_fee_cache: PrioritizationFeeCache,
}

fn setup(apply_cost_tracker_during_replay: bool) -> BenchFrame {
let mint_total = u64::MAX;
let GenesisConfigInfo {
mut genesis_config, ..
} = create_genesis_config(mint_total);

// Set a high ticks_per_slot so we don't run out of ticks
// during the benchmark
genesis_config.ticks_per_slot = 10_000;

let mut bank = Bank::new_for_benches(&genesis_config);

if !apply_cost_tracker_during_replay {
bank.deactivate_feature(&apply_cost_tracker_during_replay::id());
}

// Allow arbitrary transaction processing time for the purposes of this bench
bank.ns_per_slot = u128::MAX;

// set cost tracker limits to MAX so it will not filter out TXs
bank.write_cost_tracker()
.unwrap()
.set_limits(std::u64::MAX, std::u64::MAX, std::u64::MAX);
let bank = bank.wrap_with_bank_forks_for_tests().0;
let prioritization_fee_cache = PrioritizationFeeCache::default();
BenchFrame {
bank,
prioritization_fee_cache,
}
}

fn bench_execute_batch(
bencher: &mut Bencher,
batch_size: usize,
apply_cost_tracker_during_replay: bool,
) {
let BenchFrame {
bank,
prioritization_fee_cache,
} = setup(apply_cost_tracker_during_replay);
let transactions = create_transactions(&bank, 2_usize.pow(20));
let batches: Vec<_> = transactions
.chunks(batch_size)
.map(|txs| {
let mut batch =
TransactionBatch::new(vec![Ok(()); txs.len()], &bank, Cow::Borrowed(txs));
batch.set_needs_unlock(false);
TransactionBatchWithIndexes {
batch,
transaction_indexes: (0..batch_size).collect(),
}
})
.collect();
let mut batches_iter = batches.into_iter();
apfitzge marked this conversation as resolved.
Show resolved Hide resolved

let mut timing = ExecuteTimings::default();
bencher.iter({
let bank = bank.clone();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: a single Arc::clone() won't skew the results, but I'd like to remove .clone() here like this mainly for code simplicity:

$ git diff apfitzge/bench_execute_batch
diff --git a/ledger/benches/blockstore_processor.rs b/ledger/benches/blockstore_processor.rs
index b5d83144a6..e0d19853fe 100644
--- a/ledger/benches/blockstore_processor.rs
+++ b/ledger/benches/blockstore_processor.rs
@@ -113,11 +113,12 @@ fn bench_execute_batch(
         prioritization_fee_cache,
     } = setup(apply_cost_tracker_during_replay);
     let transactions = create_transactions(&bank, 2_usize.pow(20));
+    let bank2 = bank.clone();
     let batches: Vec<_> = transactions
         .chunks(batch_size)
         .map(|txs| {
             let mut batch =
-                TransactionBatch::new(vec![Ok(()); txs.len()], &bank, Cow::Borrowed(txs));
+                TransactionBatch::new(vec![Ok(()); txs.len()], &bank2, Cow::Borrowed(txs));
             batch.set_needs_unlock(false);
             TransactionBatchWithIndexes {
                 batch,
@@ -128,20 +129,17 @@ fn bench_execute_batch(
     let mut batches_iter = batches.into_iter();
 
     let mut timing = ExecuteTimings::default();
-    bencher.iter({
-        let bank = bank.clone();
-        move || {
-            let batch = batches_iter.next().unwrap();
-            execute_batch(
-                &batch,
-                &bank,
-                None,
-                None,
-                &mut timing,
-                None,
-                &prioritization_fee_cache,
-            )
-        }
+    bencher.iter(|| {
+        let batch = batches_iter.next().unwrap();
+        execute_batch(
+            &batch,
+            &bank,
+            None,
+            None,
+            &mut timing,
+            None,
+            &prioritization_fee_cache,
+        )
     });
 }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Arc::clone wasn't actually happening per iteration, but just in the creation of the closure because I marked the closure with move...which I'm not sure was necessary.

It wasn't! d2dfb14

move || {
let batch = batches_iter.next().unwrap();
execute_batch(
Copy link
Member

@ryoqun ryoqun Jan 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to process the same total number of transactions regardless batch_size to show the overhead more clearly:

diff --git a/ledger/benches/blockstore_processor.rs b/ledger/benches/blockstore_processor.rs
index e0d19853fe..7ec2b17d97 100644
--- a/ledger/benches/blockstore_processor.rs
+++ b/ledger/benches/blockstore_processor.rs
@@ -130,16 +130,18 @@ fn bench_execute_batch(
 
     let mut timing = ExecuteTimings::default();
     bencher.iter(|| {
-        let batch = batches_iter.next().unwrap();
-        execute_batch(
-            &batch,
-            &bank,
-            None,
-            None,
-            &mut timing,
-            None,
-            &prioritization_fee_cache,
-        )
+        for _ in 0..(64/batch_size) {  // EDIT: well, using `.take()` is prefered...
+            let batch = batches_iter.next().unwrap();
+            execute_batch(
+                &batch,
+                &bank,
+                None,
+                None,
+                &mut timing,
+                None,
+                &prioritization_fee_cache,
+            ).unwrap();
+        }
     });
 }

result:

running 6 tests
test bench_execute_batch_full_batch                        ... bench:     740,169 ns/iter (+/- 39,800)
test bench_execute_batch_full_batch_disable_tx_cost_update ... bench:     774,346 ns/iter (+/- 28,495)
test bench_execute_batch_half_batch                        ... bench:     824,189 ns/iter (+/- 29,661)
test bench_execute_batch_half_batch_disable_tx_cost_update ... bench:     811,608 ns/iter (+/- 20,936)
test bench_execute_batch_unbatched                         ... bench:   1,381,782 ns/iter (+/- 49,475)
test bench_execute_batch_unbatched_disable_tx_cost_update  ... bench:   1,334,157 ns/iter (+/- 88,541)

test result: ok. 0 passed; 0 failed; 0 ignored; 6 measured; 0 filtered out; finished in 42.13s

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's a great idea!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't use take since that will consume the iterator and not let us use it the next iteration of the benchmark.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made a similar change for the consumer benchmarks: #34752

Thanks for the suggestion, no more math to compare the throughput

&batch,
&bank,
None,
None,
&mut timing,
None,
&prioritization_fee_cache,
)
}
});
}

#[bench]
fn bench_execute_batch_unbatched(bencher: &mut Bencher) {
bench_execute_batch(bencher, 1, true);
}

#[bench]
fn bench_execute_batch_half_batch(bencher: &mut Bencher) {
bench_execute_batch(bencher, 32, true);
}

#[bench]
fn bench_execute_batch_full_batch(bencher: &mut Bencher) {
bench_execute_batch(bencher, 64, true);
}

#[bench]
fn bench_execute_batch_unbatched_disable_tx_cost_update(bencher: &mut Bencher) {
bench_execute_batch(bencher, 1, false);
}

#[bench]
fn bench_execute_batch_half_batch_disable_tx_cost_update(bencher: &mut Bencher) {
bench_execute_batch(bencher, 32, false);
}

#[bench]
fn bench_execute_batch_full_batch_disable_tx_cost_update(bencher: &mut Bencher) {
bench_execute_batch(bencher, 64, false);
}
Loading