diff --git a/core/src/sigverify_stage.rs b/core/src/sigverify_stage.rs index 9aa7e6871d5434..d45adefb877c55 100644 --- a/core/src/sigverify_stage.rs +++ b/core/src/sigverify_stage.rs @@ -228,19 +228,18 @@ impl SigVerifyStage { num_packets, ); - //50ns per packet with a single core + let mut dedup_time = Measure::start("sigverify_dedup_time"); + let dedup_fail = deduper.dedup_packets(&mut batches) as usize; + dedup_time.stop(); + let num_unique = num_packets.saturating_sub(dedup_fail); + let mut discard_time = Measure::start("sigverify_discard_time"); - if num_packets > MAX_SIGVERIFY_BATCH { + if num_unique > MAX_SIGVERIFY_BATCH { Self::discard_excess_packets(&mut batches, MAX_SIGVERIFY_BATCH) }; - let excess_fail = num_packets.saturating_sub(MAX_SIGVERIFY_BATCH); + let excess_fail = num_unique.saturating_sub(MAX_SIGVERIFY_BATCH); discard_time.stop(); - //100ns per packet with N cores - let mut dedup_time = Measure::start("sigverify_dedup_time"); - let dedup_fail = deduper.dedup_packets(&mut batches) as usize; - dedup_time.stop(); - let mut verify_batch_time = Measure::start("sigverify_batch_time"); let batches = verifier.verify_batches(batches); sendr.send(batches)?; diff --git a/perf/benches/dedup.rs b/perf/benches/dedup.rs index c50b827500c2d8..8009bf2804bcfe 100644 --- a/perf/benches/dedup.rs +++ b/perf/benches/dedup.rs @@ -12,6 +12,8 @@ use { test::Bencher, }; +const NUM: usize = 4096; + fn test_packet_with_size(size: usize, rng: &mut ThreadRng) -> Vec { // subtract 8 bytes because the length will get serialized as well (0..size.checked_sub(8).unwrap()) @@ -39,7 +41,7 @@ fn bench_dedup_same_small_packets(bencher: &mut Bencher) { let batches = to_packet_batches( &std::iter::repeat(small_packet) - .take(4096) + .take(NUM) .collect::>(), 128, ); @@ -54,7 +56,7 @@ fn bench_dedup_same_big_packets(bencher: &mut Bencher) { let big_packet = test_packet_with_size(1024, &mut rng); let batches = to_packet_batches( - &std::iter::repeat(big_packet).take(4096).collect::>(), + &std::iter::repeat(big_packet).take(NUM).collect::>(), 128, ); @@ -67,7 +69,7 @@ fn bench_dedup_diff_small_packets(bencher: &mut Bencher) { let mut rng = rand::thread_rng(); let batches = to_packet_batches( - &(0..4096) + &(0..NUM) .map(|_| test_packet_with_size(128, &mut rng)) .collect::>(), 128, @@ -82,7 +84,7 @@ fn bench_dedup_diff_big_packets(bencher: &mut Bencher) { let mut rng = rand::thread_rng(); let batches = to_packet_batches( - &(0..4096) + &(0..NUM) .map(|_| test_packet_with_size(1024, &mut rng)) .collect::>(), 128, @@ -91,6 +93,21 @@ fn bench_dedup_diff_big_packets(bencher: &mut Bencher) { do_bench_dedup_packets(bencher, batches); } +#[bench] +#[ignore] +fn bench_dedup_baseline(bencher: &mut Bencher) { + let mut rng = rand::thread_rng(); + + let batches = to_packet_batches( + &(0..0) + .map(|_| test_packet_with_size(128, &mut rng)) + .collect::>(), + 128, + ); + + do_bench_dedup_packets(bencher, batches); +} + #[bench] #[ignore] fn bench_dedup_reset(bencher: &mut Bencher) { diff --git a/perf/src/sigverify.rs b/perf/src/sigverify.rs index c20c557f6b1ea9..192e8cad09b3e2 100644 --- a/perf/src/sigverify.rs +++ b/perf/src/sigverify.rs @@ -454,16 +454,12 @@ impl Deduper { } pub fn dedup_packets(&self, batches: &mut [PacketBatch]) -> u64 { - use rayon::prelude::*; - // machine specific random offset to read the u64 from the packet signature let count = AtomicU64::new(0); - PAR_THREAD_POOL.install(|| { - batches.into_par_iter().for_each(|batch| { - batch - .packets - .par_iter_mut() - .for_each(|p| self.dedup_packet(&count, p)) - }) + batches.iter_mut().for_each(|batch| { + batch + .packets + .iter_mut() + .for_each(|p| self.dedup_packet(&count, p)) }); count.load(Ordering::Relaxed) } @@ -479,7 +475,7 @@ pub fn ed25519_verify_cpu(batches: &mut [PacketBatch], reject_non_vote: bool) { .packets .par_iter_mut() .for_each(|p| verify_packet(p, reject_non_vote)) - }) + }); }); inc_new_counter_debug!("ed25519_verify_cpu", packet_count); }