From 64dd941fdb398dcb98d39138754ff8d9bc85a5a0 Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Wed, 31 Jul 2024 11:18:32 +0200 Subject: [PATCH] Optimize frame stats collection (#151) ### Checklist * [x] I have read the [Contributor Guide](../CONTRIBUTING.md) * [x] I have read and agree to the [Code of Conduct](../CODE_OF_CONDUCT.md) * [x] I have added a description of my changes and why I'd like them included in the section below ### Description of Changes Optimize UI performance by calculating frame collection statistics dynamically. Rather than repeatedly traversing each frame, we now update the stats upon adding or removing frames. This change mitigates the significant overhead caused by constantly accessing frame data protected by `RwLock`. Also, this update modifies the data containers used for storing frames to adopt binary tree structures. As a result, all frames are sorted at the time of insertion, eliminating the need for subsequent sorting during vector collection. Moreover, instead of returning vectors, the binary tree structure enables iteration over elements in a sorted order, providing iterators rather than vectors. This functionality gives the API caller the flexibility to either clone frames or simply inspect them in an efficient manner. Before: ![before](https://github.com/EmbarkStudios/puffin/assets/7009786/e1472c1e-77e8-4845-beff-2d5a9bca0e1a) After: ![after](https://github.com/EmbarkStudios/puffin/assets/7009786/ea100b5a-ad21-4b7d-a0bd-45d918c656f5) --- Cargo.lock | 1 + puffin/CHANGELOG.md | 2 + puffin/Cargo.toml | 1 + puffin/benches/benchmark.rs | 9 ++ puffin/src/frame_data.rs | 226 +++++++++++++++++++++---------- puffin/src/lib.rs | 2 +- puffin/src/profile_view.rs | 256 +++++++++++++++++++++++++++++------- puffin_egui/CHANGELOG.md | 1 + puffin_egui/src/lib.rs | 94 ++++++------- 9 files changed, 424 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a8d32977..e02cfa0a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1730,6 +1730,7 @@ dependencies = [ "byteorder", "cfg-if", "criterion", + "itertools", "js-sys", "lz4_flex", "once_cell", diff --git a/puffin/CHANGELOG.md b/puffin/CHANGELOG.md index 9249fa27..3ff86708 100644 --- a/puffin/CHANGELOG.md +++ b/puffin/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ReleaseDate +- [PR#151](https://github.com/EmbarkStudios/puffin/pull/151) Optimize frame statistics collection. + ## [0.19.0] - 2024-01-17 - [PR#169](https://github.com/EmbarkStudios/puffin/pull/169) Stream scope information only once, drastically reduce bandwidth and increased performance. Allow better usage of static strings in profile scopes. Breaking change! See PR for migration guide. diff --git a/puffin/Cargo.toml b/puffin/Cargo.toml index 3c7fee2f..33633851 100644 --- a/puffin/Cargo.toml +++ b/puffin/Cargo.toml @@ -37,6 +37,7 @@ web = ["dep:js-sys", "dep:web-time"] [dependencies] byteorder = { version = "1.0" } cfg-if = "1.0" +itertools = "0.10" once_cell = "1.0" parking_lot = { version = "0.12"} diff --git a/puffin/benches/benchmark.rs b/puffin/benches/benchmark.rs index 33f20317..b5d8eeb7 100644 --- a/puffin/benches/benchmark.rs +++ b/puffin/benches/benchmark.rs @@ -31,6 +31,15 @@ pub fn criterion_benchmark(c: &mut Criterion) { puffin::profile_scope!("my longish scope name", "my_mesh.obj"); }) }); + c.bench_function("flush_frames", |b| { + puffin::GlobalProfiler::lock().new_frame(); + let _fv = puffin::GlobalFrameView::default(); + + b.iter(|| { + puffin::profile_function!(); + puffin::GlobalProfiler::lock().new_frame(); + }) + }); puffin::set_scopes_on(false); c.bench_function("profile_function_off", |b| { diff --git a/puffin/src/frame_data.rs b/puffin/src/frame_data.rs index 6a076f2a..3393f20d 100644 --- a/puffin/src/frame_data.rs +++ b/puffin/src/frame_data.rs @@ -12,7 +12,7 @@ pub type ThreadStreams = BTreeMap>; /// Meta-information about a frame. #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -#[derive(Clone, Debug)] +#[derive(Clone, Copy, Debug)] pub struct FrameMeta { /// What frame this is (counting from 0 at application startup). pub frame_index: FrameIndex, @@ -160,6 +160,13 @@ impl FrameData { self.unpacked_frame.meta.num_bytes } + /// Returns the packing information for the frame. + pub fn packing_info(&self) -> PackingInfo { + PackingInfo { + unpacked_size: Some(self.unpacked_frame.meta.num_bytes), + packed_size: None, + } + } /// Always returns `false`. pub fn has_packed(&self) -> bool { false @@ -320,14 +327,10 @@ impl PackedStreams { #[cfg(feature = "packing")] pub struct FrameData { meta: FrameMeta, - /// * [`None`] if still compressed. - /// * `Some(Err(…))` if there was a problem during unpacking. - /// * `Some(Ok(…))` if unpacked. - unpacked_frame: RwLock>>>, - /// [`UnpackedFrameData::thread_streams`], compressed. - /// [`None`] if not yet compressed. - packed_streams: RwLock>, + /// Encapsulates the frame data in its current state, which can be + /// uncompressed, compressed, or a combination of both + data: RwLock, /// Scopes that were registered during this frame. pub scope_delta: Vec>, @@ -337,6 +340,113 @@ pub struct FrameData { pub full_delta: bool, } +#[derive(Clone, Copy, Debug)] +pub struct PackingInfo { + /// Number of bytes used when unpacked, if has unpacked. + pub unpacked_size: Option, + /// Number of bytes used by the packed data, if has packed. + pub packed_size: Option, +} + +#[cfg(feature = "packing")] +enum FrameDataState { + /// Unpacked data. + Unpacked(Arc), + + /// [`UnpackedFrameData::thread_streams`], compressed. + Packed(PackedStreams), + + /// Both compressed and uncompressed. + Both(Arc, PackedStreams), +} + +#[cfg(feature = "packing")] +impl FrameDataState { + fn unpacked_size(&self) -> Option { + match self { + FrameDataState::Packed(_) => None, + FrameDataState::Unpacked(unpacked) | FrameDataState::Both(unpacked, _) => { + Some(unpacked.meta.num_bytes) + } + } + } + + fn unpacked(&self) -> Option> { + match self { + FrameDataState::Packed(_) => None, + FrameDataState::Unpacked(unpacked) | FrameDataState::Both(unpacked, _) => { + Some(unpacked.clone()) + } + } + } + + fn unpack(&mut self, unpacked: Arc) { + let temp = std::mem::replace( + self, + FrameDataState::Packed(PackedStreams::new(CompressionKind::Uncompressed, vec![])), + ); + + if let FrameDataState::Packed(packed) = temp { + // Transform only if we don't have unpacked already + *self = FrameDataState::Both(unpacked, packed); + } else { + // Restore the original value if it was not Inner::Packed + *self = temp; + } + } + + fn packed_size(&self) -> Option { + match self { + FrameDataState::Unpacked(_) => None, + FrameDataState::Packed(packed) | FrameDataState::Both(_, packed) => { + Some(packed.num_bytes()) + } + } + } + + fn packed(&self) -> Option<&PackedStreams> { + match self { + FrameDataState::Unpacked(_) => None, + FrameDataState::Packed(packed) | FrameDataState::Both(_, packed) => Some(packed), + } + } + + fn pack_and_remove(&mut self) { + if let FrameDataState::Unpacked(ref unpacked) | FrameDataState::Both(ref unpacked, _) = + *self + { + let packed = PackedStreams::pack(&unpacked.thread_streams); + *self = Self::Packed(packed); + } + } + + fn pack_and_keep(&mut self) { + if let FrameDataState::Unpacked(ref unpacked) = *self { + let packed = PackedStreams::pack(&unpacked.thread_streams); + *self = Self::Packed(packed); + } + } + + fn bytes_of_ram_used(&self) -> usize { + self.unpacked_size().unwrap_or(0) + self.packed_size().unwrap_or(0) + } + + fn has_packed(&self) -> bool { + matches!(self, FrameDataState::Packed(_) | FrameDataState::Both(..)) + } + + fn has_unpacked(&self) -> bool { + matches!(self, FrameDataState::Unpacked(_) | FrameDataState::Both(..)) + } + + fn packing_info(&self) -> PackingInfo { + PackingInfo { + unpacked_size: self.unpacked_size(), + packed_size: self.packed_size(), + } + } +} + #[cfg(feature = "packing")] impl FrameData { /// Create a new [`FrameData`]. @@ -359,9 +469,8 @@ impl FrameData { full_delta: bool, ) -> Self { Self { - meta: unpacked_frame.meta.clone(), - unpacked_frame: RwLock::new(Some(Ok(unpacked_frame))), - packed_streams: RwLock::new(None), + meta: unpacked_frame.meta, + data: RwLock::new(FrameDataState::Unpacked(unpacked_frame)), scope_delta, full_delta, } @@ -375,31 +484,37 @@ impl FrameData { /// Number of bytes used by the packed data, if packed. pub fn packed_size(&self) -> Option { - self.packed_streams.read().as_ref().map(|c| c.num_bytes()) + self.data.read().packed_size() } /// Number of bytes used when unpacked, if known. pub fn unpacked_size(&self) -> Option { - if self.has_unpacked() { - Some(self.meta.num_bytes) - } else { - None - } + self.data.read().unpacked_size() } /// bytes currently used by the unpacked and packed data. pub fn bytes_of_ram_used(&self) -> usize { - self.unpacked_size().unwrap_or(0) + self.packed_size().unwrap_or(0) + self.data.read().bytes_of_ram_used() } /// Do we have a packed version stored internally? pub fn has_packed(&self) -> bool { - self.packed_streams.read().is_some() + self.data.read().has_packed() } /// Do we have a unpacked version stored internally? pub fn has_unpacked(&self) -> bool { - self.unpacked_frame.read().is_some() + self.data.read().has_unpacked() + } + + /// Provides an overview of the frame's packing status. + /// + /// The function retrieves both the sizes of the unpacked and packed frames, as well as whether + /// packed/unpacked versions of the frame exist internally. The goal of this function is to + /// minimize the number of lock accesses by consolidating information retrieval into a single + /// operation. + pub fn packing_info(&self) -> PackingInfo { + self.data.read().packing_info() } /// Return the unpacked data. @@ -408,60 +523,34 @@ impl FrameData { /// /// Returns `Err` if failing to decode the packed data. pub fn unpacked(&self) -> anyhow::Result> { - fn unpack_frame_data( - meta: FrameMeta, - packed: &PackedStreams, - ) -> anyhow::Result { - Ok(UnpackedFrameData { - meta, - thread_streams: packed.unpack()?, - }) - } + let unpacked = { + let inner_guard = self.data.read(); + let FrameDataState::Packed(ref packed) = *inner_guard else { + // Safe to unwrap, variant has to contain unpacked if NOT `Packed` + return Ok(self.data.read().unpacked().unwrap()); + }; - let has_unpacked = self.unpacked_frame.read().is_some(); - if !has_unpacked { crate::profile_scope!("unpack_puffin_frame"); - let packed_lock = self.packed_streams.read(); - let packed = packed_lock - .as_ref() - .expect("FrameData is neither packed or unpacked"); - - let frame_data_result = unpack_frame_data(self.meta.clone(), packed); - let frame_data_result = frame_data_result.map(Arc::new); - *self.unpacked_frame.write() = Some(frame_data_result); - } - match self.unpacked_frame.read().as_ref().unwrap() { - Ok(frame) => Ok(frame.clone()), - Err(err) => Err(anyhow::format_err!("{}", err)), // can't clone `anyhow::Error` - } + Arc::new(UnpackedFrameData { + meta: self.meta, + thread_streams: packed.unpack()?, + }) + }; + + self.data.write().unpack(unpacked.clone()); + Ok(unpacked) } /// Make the [`FrameData`] use up less memory. /// Idempotent. pub fn pack(&self) { - self.create_packed(); - *self.unpacked_frame.write() = None; + self.data.write().pack_and_remove(); } /// Create a packed storage without freeing the unpacked storage. fn create_packed(&self) { - let has_packed = self.packed_streams.read().is_some(); - if !has_packed { - // crate::profile_scope!("pack_puffin_frame"); // we get called from `GlobalProfiler::new_frame`, so avoid recursiveness! - let unpacked_frame = self - .unpacked_frame - .read() - .as_ref() - .expect("We should have an unpacked frame if we don't have a packed one") - .as_ref() - .expect("The unpacked frame should be error free, since it doesn't come from packed source") - .clone(); - - let packed = PackedStreams::pack(&unpacked_frame.thread_streams); - - *self.packed_streams.write() = Some(packed); - } + self.data.write().pack_and_keep(); } /// Writes one [`FrameData`] into a stream, prefixed by its length ([`u32`] le). @@ -483,8 +572,8 @@ impl FrameData { write.write_all(&meta_serialized)?; self.create_packed(); - let packed_streams_lock = self.packed_streams.read(); - let packed_streams = packed_streams_lock.as_ref().unwrap(); // We just called create_packed + let packed_streams_lock = self.data.read(); + let packed_streams = packed_streams_lock.packed().unwrap(); // We just called create_packed write.write_all(&(packed_streams.num_bytes() as u32).to_le_bytes())?; write.write_u8(packed_streams.compression_kind as u8)?; @@ -613,8 +702,7 @@ impl FrameData { Ok(Some(Self { meta, - unpacked_frame: RwLock::new(None), - packed_streams: RwLock::new(Some(packed_streams)), + data: RwLock::new(FrameDataState::Packed(packed_streams)), scope_delta: Default::default(), full_delta: false, })) @@ -645,8 +733,7 @@ impl FrameData { Ok(Some(Self { meta, - unpacked_frame: RwLock::new(None), - packed_streams: RwLock::new(Some(packed_streams)), + data: RwLock::new(FrameDataState::Packed(packed_streams)), scope_delta: Default::default(), full_delta: false, })) @@ -685,8 +772,7 @@ impl FrameData { Ok(Some(Self { meta, - unpacked_frame: RwLock::new(None), - packed_streams: RwLock::new(Some(streams_compressed)), + data: RwLock::new(FrameDataState::Packed(streams_compressed)), scope_delta: new_scopes, full_delta: false, })) diff --git a/puffin/src/lib.rs b/puffin/src/lib.rs index 769b7757..4687bc06 100644 --- a/puffin/src/lib.rs +++ b/puffin/src/lib.rs @@ -39,7 +39,7 @@ pub use data::{Error, Reader, Result, Scope, ScopeRecord, Stream, StreamInfo, St pub use frame_data::{FrameData, FrameMeta, UnpackedFrameData}; pub use global_profiler::{FrameSink, GlobalProfiler}; pub use merge::{merge_scopes_for_thread, MergeScope}; -pub use profile_view::{select_slowest, FrameView, GlobalFrameView}; +pub use profile_view::{select_slowest, FrameStats, FrameView, GlobalFrameView}; pub use scope_details::{ScopeCollection, ScopeDetails, ScopeType}; pub use thread_profiler::{ThreadInfo, ThreadProfiler}; pub use utils::{clean_function_name, short_file_name, shorten_rust_function_name, type_name_of}; diff --git a/puffin/src/profile_view.rs b/puffin/src/profile_view.rs index df283a5e..605c610d 100644 --- a/puffin/src/profile_view.rs +++ b/puffin/src/profile_view.rs @@ -1,4 +1,9 @@ -use std::sync::Arc; +use itertools::Itertools; +use std::{ + cmp::Ordering, + collections::{BTreeSet, VecDeque}, + sync::Arc, +}; use crate::{FrameData, FrameSinkId, ScopeCollection}; @@ -6,16 +11,21 @@ use crate::{FrameData, FrameSinkId, ScopeCollection}; #[derive(Clone)] pub struct FrameView { /// newest first - recent: std::collections::VecDeque>, + recent: VecDeque, max_recent: usize, - slowest: std::collections::BinaryHeap, + slowest_by_index: BTreeSet, + slowest_by_duration: BTreeSet, max_slow: usize, /// Minimizes memory usage at the expense of CPU time. /// /// Only recommended if you set a large max_recent size. pack_frames: bool, + + /// Maintain stats as we add/remove frames + stats: FrameStats, + scope_collection: ScopeCollection, } @@ -25,11 +35,13 @@ impl Default for FrameView { let max_slow = 256; Self { - recent: std::collections::VecDeque::with_capacity(max_recent), + recent: VecDeque::with_capacity(max_recent), max_recent, - slowest: std::collections::BinaryHeap::with_capacity(max_slow), + slowest_by_index: BTreeSet::new(), + slowest_by_duration: BTreeSet::new(), max_slow, pack_frames: true, + stats: Default::default(), scope_collection: Default::default(), } } @@ -38,7 +50,7 @@ impl Default for FrameView { impl FrameView { /// Returns `true` if there are no recent or slowest frames. pub fn is_empty(&self) -> bool { - self.recent.is_empty() && self.slowest.is_empty() + self.recent.is_empty() && self.slowest_by_duration.is_empty() } /// Returns the collection of scope details. @@ -54,84 +66,117 @@ impl FrameView { self.scope_collection.insert(new_scope.clone()); } - if let Some(last) = self.recent.back() { - if new_frame.frame_index() <= last.frame_index() { + if let Some(last) = self.recent.iter().last() { + if new_frame.frame_index() <= last.0.frame_index() { // A frame from the past!? // Likely we are `puffin_viewer`, and the server restarted. // The safe choice is to clear everything: + self.stats.clear(); self.recent.clear(); - self.slowest.clear(); + self.slowest_by_index.clear(); + self.slowest_by_duration.clear(); + } + } + + if let Some(last) = self.recent.iter().last() { + // Assume there is a viewer viewing the newest frame, + // and compress the previously newest frame to save RAM: + if self.pack_frames { + last.0.pack(); } + + self.stats.add(&last.0); } - let add_to_slowest = if self.slowest.len() < self.max_slow { + let add_to_slowest = if self.slowest_by_duration.len() < self.max_slow { true - } else if let Some(fastest_of_the_slow) = self.slowest.peek() { + } else if let Some(fastest_of_the_slow) = self.slowest_by_duration.iter().last() { new_frame.duration_ns() > fastest_of_the_slow.0.duration_ns() } else { false }; if add_to_slowest { - self.slowest.push(OrderedByDuration(new_frame.clone())); - while self.slowest.len() > self.max_slow { - self.slowest.pop(); - } + self.add_slow_frame(&new_frame); } - if let Some(last) = self.recent.back() { - // Assume there is a viewer viewing the newest frame, - // and compress the previously newest frame to save RAM: - if self.pack_frames { - last.pack(); + self.add_recent_frame(&new_frame); + } + + fn add_slow_frame(&mut self, new_frame: &Arc) { + assert_eq!(self.slowest_by_duration.len(), self.slowest_by_index.len()); + + self.slowest_by_duration + .insert(OrderedByDuration(new_frame.clone())); + self.slowest_by_index + .insert(OrderedByIndex(new_frame.clone())); + + while self.slowest_by_duration.len() > self.max_slow { + if let Some(removed_frame) = self.slowest_by_duration.pop_last() { + let removed_by_index = OrderedByIndex(removed_frame.0.clone()); + self.slowest_by_index.remove(&removed_by_index); + + // Only remove from stats if the frame is not present in recent + if self.recent.binary_search(&removed_by_index).is_err() { + self.stats.remove(&removed_frame.0); + } } } + } + + fn add_recent_frame(&mut self, new_frame: &Arc) { + self.recent.push_back(OrderedByIndex(new_frame.clone())); - self.recent.push_back(new_frame); while self.recent.len() > self.max_recent { - self.recent.pop_front(); + if let Some(removed_frame) = self.recent.pop_front() { + // Only remove from stats if the frame is not present in slowest + if !self.slowest_by_index.contains(&removed_frame) { + self.stats.remove(&removed_frame.0); + } + } } } /// The latest fully captured frame of data. pub fn latest_frame(&self) -> Option> { - self.recent.back().cloned() + self.recent.back().map(|f| f.0.clone()) } /// Returns up to `n` latest fully captured frames of data. - pub fn latest_frames(&self, n: usize) -> Vec> { + pub fn latest_frames(&self, n: usize) -> impl Iterator> { // Probably not the best way to do this, but since // [`self.recent`] is immutable in this context and // working with deque slices is complicated, we'll do // it this way for now. - self.recent.iter().rev().take(n).rev().cloned().collect() + self.recent.iter().rev().take(n).rev().map(|f| &f.0) } /// Oldest first pub fn recent_frames(&self) -> impl Iterator> { - self.recent.iter() + self.recent.iter().map(|f| &f.0) } /// The slowest frames so far (or since last call to [`Self::clear_slowest()`]) /// in chronological order. - pub fn slowest_frames_chronological(&self) -> Vec> { - let mut frames: Vec<_> = self.slowest.iter().map(|f| f.0.clone()).collect(); - frames.sort_by_key(|frame| frame.frame_index()); - frames + pub fn slowest_frames_chronological(&self) -> impl Iterator> { + self.slowest_by_index.iter().map(|f| &f.0) } /// All frames sorted chronologically. - pub fn all_uniq(&self) -> Vec> { - let mut all: Vec<_> = self.slowest.iter().map(|f| f.0.clone()).collect(); - all.extend(self.recent.iter().cloned()); - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all + pub fn all_uniq(&self) -> impl Iterator> { + Itertools::merge(self.recent.iter(), self.slowest_by_index.iter()) + .dedup() + .map(|f| &f.0) } /// Clean history of the slowest frames. pub fn clear_slowest(&mut self) { - self.slowest.clear(); + for frame in self.slowest_by_index.iter() { + self.stats.remove(&frame.0); + } + + self.slowest_by_duration.clear(); + self.slowest_by_index.clear(); } /// How many frames of recent history to store. @@ -165,18 +210,25 @@ impl FrameView { self.pack_frames = pack_frames; } + /// Retrieve statistics for added frames. This operation is efficient and suitable when + /// frames have not been manipulated outside of `ProfileView`, such as being unpacked. For + /// comprehensive statistics, refer to [`Self::stats_full()`] + pub fn stats(&self) -> FrameStats { + self.stats + } + + /// Retrieve detailed statistics by performing a full computation on all the added frames. + pub fn stats_full(&self) -> FrameStats { + FrameStats::from_frames(self.all_uniq().map(Arc::as_ref)) + } + /// Export profile data as a `.puffin` file/stream. #[cfg(feature = "serialization")] #[cfg(not(target_arch = "wasm32"))] // compression not supported on wasm pub fn write(&self, write: &mut impl std::io::Write) -> anyhow::Result<()> { write.write_all(b"PUF0")?; - let slowest_frames = self.slowest.iter().map(|f| &f.0); - let mut frames: Vec<_> = slowest_frames.chain(self.recent.iter()).collect(); - frames.sort_by_key(|frame| frame.frame_index()); - frames.dedup_by_key(|frame| frame.frame_index()); - - for frame in frames { + for frame in self.all_uniq() { frame.write_into(&self.scope_collection, false, write)?; } Ok(()) @@ -224,22 +276,55 @@ pub fn select_slowest(frames: &[Arc], max: usize) -> Vec); +impl Ord for OrderedByDuration { + fn cmp(&self, other: &Self) -> Ordering { + match self.0.duration_ns().cmp(&other.0.duration_ns()).reverse() { + Ordering::Equal => self.0.frame_index().cmp(&other.0.frame_index()), + res => res, + } + } +} + +impl PartialOrd for OrderedByDuration { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for OrderedByDuration {} + impl PartialEq for OrderedByDuration { fn eq(&self, other: &Self) -> bool { - self.0.duration_ns().eq(&other.0.duration_ns()) + self.0.duration_ns() == other.0.duration_ns() + && self.0.frame_index() == other.0.frame_index() } } -impl Eq for OrderedByDuration {} -impl PartialOrd for OrderedByDuration { - fn partial_cmp(&self, other: &Self) -> Option { +// ---------------------------------------------------------------------------- +#[derive(Clone)] +struct OrderedByIndex(Arc); + +impl Ord for OrderedByIndex { + fn cmp(&self, other: &Self) -> Ordering { + match self.0.frame_index().cmp(&other.0.frame_index()) { + Ordering::Equal => self.0.duration_ns().cmp(&other.0.duration_ns()), + res => res, + } + } +} + +impl PartialOrd for OrderedByIndex { + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Ord for OrderedByDuration { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.0.duration_ns().cmp(&other.0.duration_ns()).reverse() +impl Eq for OrderedByIndex {} + +impl PartialEq for OrderedByIndex { + fn eq(&self, other: &Self) -> bool { + self.0.frame_index() == other.0.frame_index() + && self.0.duration_ns() == other.0.duration_ns() } } @@ -279,3 +364,74 @@ impl GlobalFrameView { self.view.lock() } } + +// ---------------------------------------------------------------------------- + +fn stats_entry(frame: &FrameData) -> (usize, usize) { + let info = frame.packing_info(); + ( + info.packed_size.unwrap_or(0) + info.unpacked_size.unwrap_or(0), + info.unpacked_size.is_some() as usize, + ) +} + +/// Collect statistics for maintained frames +#[derive(Clone, Copy, Debug, Default)] +pub struct FrameStats { + unique_frames: usize, + total_ram_used: usize, + unpacked_frames: usize, +} + +impl FrameStats { + /// Creates a `FrameStats` instance from an iterator of frames. + pub fn from_frames<'a>(frames: impl Iterator) -> Self { + let mut stats = FrameStats::default(); + + for frame in frames { + stats.add(frame); + } + + stats + } + + /// Adds a frame's statistics to the `FrameStats`. + fn add(&mut self, frame: &FrameData) { + let (total, unpacked) = stats_entry(frame); + + self.total_ram_used = self.total_ram_used.saturating_add(total); + self.unpacked_frames = self.unpacked_frames.saturating_add(unpacked); + self.unique_frames = self.unique_frames.saturating_add(1); + } + + /// Removes a frame's statistics from the `FrameStats`. + fn remove(&mut self, frame: &FrameData) { + let (total, unpacked) = stats_entry(frame); + + self.total_ram_used = self.total_ram_used.saturating_sub(total); + self.unpacked_frames = self.unpacked_frames.saturating_sub(unpacked); + self.unique_frames = self.unique_frames.saturating_sub(1); + } + + /// Returns the number of unique frames. + pub fn frames(&self) -> usize { + self.unique_frames + } + + /// Returns the number of unpacked frames. + pub fn unpacked_frames(&self) -> usize { + self.unpacked_frames + } + + /// Returns the total bytes of RAM used. + pub fn bytes_of_ram_used(&self) -> usize { + self.total_ram_used + } + + /// Clears all statistics in `FrameStats`. + pub fn clear(&mut self) { + self.unique_frames = 0; + self.unpacked_frames = 0; + self.total_ram_used = 0; + } +} diff --git a/puffin_egui/CHANGELOG.md b/puffin_egui/CHANGELOG.md index 1c11621b..d1d6b5e3 100644 --- a/puffin_egui/CHANGELOG.md +++ b/puffin_egui/CHANGELOG.md @@ -9,6 +9,7 @@ All notable changes to the egui crate will be documented in this file. - [PR#214](https://github.com/EmbarkStudios/puffin/pull/214) Fix frame selection input handling - [PR#218](https://github.com/EmbarkStudios/puffin/pull/218) Fix flamegraph click intersection +- [PR#151](https://github.com/EmbarkStudios/puffin/pull/151) Optimize frame statistics collection. ## [0.27.1] - 2024-06-16 diff --git a/puffin_egui/src/lib.rs b/puffin_egui/src/lib.rs index 1542d320..26337a8e 100644 --- a/puffin_egui/src/lib.rs +++ b/puffin_egui/src/lib.rs @@ -23,6 +23,7 @@ use puffin::*; use std::{ collections::{BTreeMap, BTreeSet}, fmt::Write as _, + iter, sync::Arc, }; use time::OffsetDateTime; @@ -142,23 +143,19 @@ impl GlobalProfilerUi { pub struct AvailableFrames { pub recent: Vec>, pub slowest: Vec>, + pub uniq: Vec>, + pub stats: FrameStats, } impl AvailableFrames { fn latest(frame_view: &FrameView) -> Self { Self { recent: frame_view.recent_frames().cloned().collect(), - slowest: frame_view.slowest_frames_chronological(), + slowest: frame_view.slowest_frames_chronological().cloned().collect(), + uniq: frame_view.all_uniq().cloned().collect(), + stats: Default::default(), } } - - fn all_uniq(&self) -> Vec> { - let mut all = self.slowest.clone(); - all.extend(self.recent.iter().cloned()); - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all - } } /// Multiple streams for one thread. @@ -215,11 +212,15 @@ pub struct SelectedFrames { } impl SelectedFrames { - fn try_from_vec( + fn try_from_iter( scope_collection: &ScopeCollection, - frames: Vec>, + frames: impl Iterator>, ) -> Option { - let frames = vec1::Vec1::try_from_vec(frames).ok()?; + let mut it = frames; + let first = it.next()?; + let mut frames = vec1::Vec1::new(first); + frames.extend(it); + Some(Self::from_vec1(scope_collection, frames)) } @@ -360,8 +361,16 @@ impl ProfilerUi { /// The frames we can select between fn frames(&self, frame_view: &FrameView) -> AvailableFrames { self.paused.as_ref().map_or_else( - || AvailableFrames::latest(frame_view), - |paused| paused.frames.clone(), + || { + let mut frames = AvailableFrames::latest(frame_view); + frames.stats = frame_view.stats(); + frames + }, + |paused| { + let mut frames = paused.frames.clone(); + frames.stats = FrameStats::from_frames(paused.frames.uniq.iter().map(Arc::as_ref)); + frames + }, ) } @@ -387,14 +396,14 @@ impl ProfilerUi { } } - fn all_known_frames(&self, frame_view: &FrameView) -> Vec> { - let mut all = frame_view.all_uniq(); - if let Some(paused) = &self.paused { - all.append(&mut paused.frames.all_uniq()); + fn all_known_frames<'a>( + &'a self, + frame_view: &'a FrameView, + ) -> Box> + '_> { + match &self.paused { + Some(paused) => Box::new(frame_view.all_uniq().chain(paused.frames.uniq.iter())), + None => Box::new(frame_view.all_uniq()), } - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all } fn run_pack_pass_if_needed(&mut self, frame_view: &FrameView) { @@ -446,7 +455,7 @@ impl ProfilerUi { let frames = if let Some(frame) = hovered_frame { match frame.unpacked() { Ok(frame) => { - SelectedFrames::try_from_vec(frame_view.scope_collection(), vec![frame]) + SelectedFrames::try_from_iter(frame_view.scope_collection(), iter::once(frame)) } Err(err) => { ui.colored_label(ERROR_COLOR, format!("Failed to load hovered frame: {err}")); @@ -457,13 +466,12 @@ impl ProfilerUi { Some(paused.selected.clone()) } else { puffin::profile_scope!("select_latest_frames"); - let latest = frame_view.latest_frames(self.max_num_latest); - let unpacked: Vec> = latest - .into_iter() + let latest = frame_view + .latest_frames(self.max_num_latest) .map(|frame| frame.unpacked()) - .filter_map(|unpacked| unpacked.ok()) - .collect(); - SelectedFrames::try_from_vec(frame_view.scope_collection(), unpacked) + .filter_map(|unpacked| unpacked.ok()); + + SelectedFrames::try_from_iter(frame_view.scope_collection(), latest) }; let frames = if let Some(frames) = frames { @@ -618,22 +626,18 @@ impl ProfilerUi { }); { - let uniq = frames.all_uniq(); - let mut bytes = 0; - let mut unpacked = 0; - for frame in &uniq { - bytes += frame.bytes_of_ram_used(); - unpacked += frame.has_unpacked() as usize; - } + let uniq = &frames.uniq; + let stats = &frames.stats; + ui.label(format!( "{} frames ({} unpacked) using approximately {:.1} MB.", - uniq.len(), - unpacked, - bytes as f64 * 1e-6 + stats.frames(), + stats.unpacked_frames(), + stats.bytes_of_ram_used() as f64 * 1e-6 )); if let Some(frame_view) = frame_view.as_mut() { - max_frames_ui(ui, frame_view); + max_frames_ui(ui, frame_view, uniq); if self.paused.is_none() { max_num_latest_ui(ui, &mut self.max_num_latest); } @@ -761,7 +765,7 @@ impl ProfilerUi { } if let Some(new_selection) = - SelectedFrames::try_from_vec(frame_view.scope_collection(), new_selection) + SelectedFrames::try_from_iter(frame_view.scope_collection(), new_selection.into_iter()) { self.pause_and_select(frame_view, new_selection); } @@ -826,13 +830,9 @@ fn format_time(nanos: NanoSecond) -> Option { } } -fn max_frames_ui(ui: &mut egui::Ui, frame_view: &mut FrameView) { - let uniq = frame_view.all_uniq(); - - let mut bytes = 0; - for frame in &uniq { - bytes += frame.bytes_of_ram_used(); - } +fn max_frames_ui(ui: &mut egui::Ui, frame_view: &mut FrameView, uniq: &[Arc]) { + let stats = frame_view.stats(); + let bytes = stats.bytes_of_ram_used(); let frames_per_second = if let (Some(first), Some(last)) = (uniq.first(), uniq.last()) { let nanos = last.range_ns().1 - first.range_ns().0;