diff --git a/Cargo.lock b/Cargo.lock index b5d3195e..c9202a95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2138,6 +2138,7 @@ dependencies = [ "cfg-if", "criterion", "instant", + "itertools", "js-sys", "lz4_flex", "once_cell", diff --git a/puffin-imgui/src/ui.rs b/puffin-imgui/src/ui.rs index 28fbf36f..99f2cc51 100644 --- a/puffin-imgui/src/ui.rs +++ b/puffin-imgui/src/ui.rs @@ -357,7 +357,7 @@ impl ProfilerUi { let view = self.frame_view.lock(); Frames { recent: view.recent_frames().cloned().collect(), - slowest: view.slowest_frames_chronological(), + slowest: view.slowest_frames_chronological().cloned().collect(), } } @@ -392,10 +392,17 @@ impl ProfilerUi { } fn all_known_frames(&self) -> Vec> { - let mut all = self.frame_view.lock().all_uniq(); + let mut all = self + .frame_view + .lock() + .all_uniq() + .cloned() + .collect::>(); + if let Some(paused) = &self.paused { all.append(&mut paused.frames.all_uniq()); } + all.sort_by_key(|frame| frame.frame_index()); all.dedup_by_key(|frame| frame.frame_index()); all diff --git a/puffin/Cargo.toml b/puffin/Cargo.toml index 9e1b0848..3376ccd9 100644 --- a/puffin/Cargo.toml +++ b/puffin/Cargo.toml @@ -38,6 +38,7 @@ web = ["instant/wasm-bindgen", "dep:js-sys"] byteorder = { version = "1.0" } cfg-if = "1.0" instant = { version = "0.1" } +itertools = "0.10" once_cell = "1.0" # Optional: diff --git a/puffin/src/profile_view.rs b/puffin/src/profile_view.rs index db32e4db..321a2b6f 100644 --- a/puffin/src/profile_view.rs +++ b/puffin/src/profile_view.rs @@ -1,16 +1,20 @@ -use std::collections::HashSet; +use std::cmp::Ordering; + use std::sync::{Arc, Mutex}; -use crate::{FrameData, FrameIndex, FrameSinkId}; +use itertools::Itertools; + +use crate::{FrameData, FrameSinkId}; /// A view of recent and slowest frames, used by GUIs. #[derive(Clone)] pub struct FrameView { /// newest first - recent: std::collections::VecDeque>, + recent: std::collections::VecDeque, max_recent: usize, - slowest: std::collections::BinaryHeap, + slowest_by_index: std::collections::BTreeSet, + slowest_by_duration: std::collections::BTreeSet, max_slow: usize, /// Minimizes memory usage at the expense of CPU time. @@ -31,7 +35,8 @@ impl Default for FrameView { Self { recent: std::collections::VecDeque::with_capacity(max_recent), max_recent, - slowest: std::collections::BinaryHeap::with_capacity(max_slow), + slowest_by_index: std::collections::BTreeSet::new(), + slowest_by_duration: std::collections::BTreeSet::new(), max_slow, pack_frames: true, stats, @@ -41,39 +46,40 @@ impl Default for FrameView { impl FrameView { pub fn is_empty(&self) -> bool { - self.recent.is_empty() && self.slowest.is_empty() + self.recent.is_empty() && self.slowest_by_duration.is_empty() } pub fn add_frame(&mut self, new_frame: Arc) { - if let Some(last) = self.recent.back() { - if new_frame.frame_index() <= last.frame_index() { + if let Some(last) = self.recent.iter().last() { + if new_frame.frame_index() <= last.0.frame_index() { // A frame from the past!? // Likely we are `puffin_viewer`, and the server restarted. // The safe choice is to clear everything: - self.recent.clear(); - self.slowest.clear(); self.stats.clear(); + self.recent.clear(); + self.slowest_by_index.clear(); + self.slowest_by_duration.clear(); } } - let add_to_slowest = if self.slowest.len() < self.max_slow { - true - } else if let Some(fastest_of_the_slow) = self.slowest.peek() { - new_frame.duration_ns() > fastest_of_the_slow.0.duration_ns() - } else { - false - }; - - if let Some(last) = self.recent.back() { + if let Some(last) = self.recent.iter().last() { // Assume there is a viewer viewing the newest frame, // and compress the previously newest frame to save RAM: if self.pack_frames { - last.pack(); + last.0.pack(); } - self.stats.add(last); + self.stats.add(&last.0); } + let add_to_slowest = if self.slowest_by_duration.len() < self.max_slow { + true + } else if let Some(fastest_of_the_slow) = self.slowest_by_duration.iter().last() { + new_frame.duration_ns() > fastest_of_the_slow.0.duration_ns() + } else { + false + }; + if add_to_slowest { self.add_slow_frame(&new_frame); } @@ -82,16 +88,20 @@ impl FrameView { } pub fn add_slow_frame(&mut self, new_frame: &Arc) { - self.slowest.push(OrderedByDuration(new_frame.clone())); + assert_eq!(self.slowest_by_duration.len(), self.slowest_by_index.len()); + + self.slowest_by_duration + .insert(OrderedByDuration(new_frame.clone())); + self.slowest_by_index + .insert(OrderedByIndex(new_frame.clone())); + + while self.slowest_by_duration.len() > self.max_slow { + if let Some(removed_frame) = self.slowest_by_duration.pop_last() { + let removed_by_index = OrderedByIndex(removed_frame.0.clone()); + self.slowest_by_index.remove(&removed_by_index); - while self.slowest.len() > self.max_slow { - if let Some(removed_frame) = self.slowest.pop() { // Only remove from stats if the frame is not present in recent - if self - .recent - .binary_search_by_key(&removed_frame.0.frame_index(), |f| f.frame_index()) - .is_err() - { + if self.recent.binary_search(&removed_by_index).is_err() { self.stats.remove(&removed_frame.0); } } @@ -99,17 +109,13 @@ impl FrameView { } pub fn add_recent_frame(&mut self, new_frame: &Arc) { - self.recent.push_back(new_frame.clone()); + self.recent.push_back(OrderedByIndex(new_frame.clone())); while self.recent.len() > self.max_recent { if let Some(removed_frame) = self.recent.pop_front() { // Only remove from stats if the frame is not present in slowest - if !self - .slowest - .iter() - .any(|f| removed_frame.frame_index() == f.0.frame_index()) - { - self.stats.remove(&removed_frame); + if !self.slowest_by_index.contains(&removed_frame) { + self.stats.remove(&removed_frame.0); } } } @@ -117,50 +123,44 @@ impl FrameView { /// The latest fully captured frame of data. pub fn latest_frame(&self) -> Option> { - self.recent.back().cloned() + self.recent.iter().last().map(|f| f.0.clone()) } /// Returns up to `n` latest fully captured frames of data. - pub fn latest_frames(&self, n: usize) -> Vec> { + pub fn latest_frames(&self, n: usize) -> impl Iterator> { // Probably not the best way to do this, but since // [`self.recent`] is immutable in this context and // working with deque slices is complicated, we'll do // it this way for now. - self.recent.iter().rev().take(n).rev().cloned().collect() + self.recent.iter().rev().take(n).rev().map(|f| &f.0) } /// Oldest first pub fn recent_frames(&self) -> impl Iterator> { - self.recent.iter() + self.recent.iter().map(|f| &f.0) } /// The slowest frames so far (or since last call to [`Self::clear_slowest()`]) /// in chronological order. - pub fn slowest_frames_chronological(&self) -> Vec> { - let mut frames: Vec<_> = self.slowest.iter().map(|f| f.0.clone()).collect(); - frames.sort_by_key(|frame| frame.frame_index()); - frames + pub fn slowest_frames_chronological(&self) -> impl Iterator> { + self.slowest_by_index.iter().map(|f| &f.0) } /// All frames sorted chronologically. - pub fn all_uniq(&self) -> Vec> { - let mut all: Vec<_> = self - .slowest - .iter() - .map(|f| f.0.clone()) - .chain(self.recent.iter().cloned()) - .collect(); - - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all + pub fn all_uniq(&self) -> impl Iterator> { + Itertools::merge(self.recent.iter(), self.slowest_by_index.iter()) + .dedup() + .map(|f| &f.0) } /// Clean history of the slowest frames. pub fn clear_slowest(&mut self) { - for frame in self.slowest.drain() { + for frame in self.slowest_by_index.iter() { self.stats.remove(&frame.0); } + + self.slowest_by_duration.clear(); + self.slowest_by_index.clear(); } /// How many frames of recent history to store. @@ -217,12 +217,7 @@ impl FrameView { pub fn save_to_writer(&self, write: &mut impl std::io::Write) -> anyhow::Result<()> { write.write_all(b"PUF0")?; - let slowest_frames = self.slowest.iter().map(|f| &f.0); - let mut frames: Vec<_> = slowest_frames.chain(self.recent.iter()).collect(); - frames.sort_by_key(|frame| frame.frame_index()); - frames.dedup_by_key(|frame| frame.frame_index()); - - for frame in frames { + for frame in self.all_uniq() { frame.write_into(write)?; } Ok(()) @@ -277,22 +272,55 @@ pub fn select_slowest(frames: &[Arc], max: usize) -> Vec); +impl Ord for OrderedByDuration { + fn cmp(&self, other: &Self) -> Ordering { + match self.0.duration_ns().cmp(&other.0.duration_ns()).reverse() { + Ordering::Equal => self.0.frame_index().cmp(&other.0.frame_index()), + res => res, + } + } +} + +impl PartialOrd for OrderedByDuration { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for OrderedByDuration {} + impl PartialEq for OrderedByDuration { fn eq(&self, other: &Self) -> bool { - self.0.duration_ns().eq(&other.0.duration_ns()) + self.0.duration_ns() == other.0.duration_ns() + && self.0.frame_index() == other.0.frame_index() } } -impl Eq for OrderedByDuration {} -impl PartialOrd for OrderedByDuration { - fn partial_cmp(&self, other: &Self) -> Option { +// ---------------------------------------------------------------------------- +#[derive(Clone)] +struct OrderedByIndex(Arc); + +impl Ord for OrderedByIndex { + fn cmp(&self, other: &Self) -> Ordering { + match self.0.frame_index().cmp(&other.0.frame_index()) { + Ordering::Equal => self.0.duration_ns().cmp(&other.0.duration_ns()), + res => res, + } + } +} + +impl PartialOrd for OrderedByIndex { + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Ord for OrderedByDuration { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.0.duration_ns().cmp(&other.0.duration_ns()).reverse() +impl Eq for OrderedByIndex {} + +impl PartialEq for OrderedByIndex { + fn eq(&self, other: &Self) -> bool { + self.0.frame_index() == other.0.frame_index() + && self.0.duration_ns() == other.0.duration_ns() } } @@ -333,7 +361,7 @@ impl GlobalFrameView { /// Collect statistics for maintained frames #[derive(Clone, Debug, Default)] pub struct FrameStats { - unique_frames: HashSet, + unique_frames: usize, total_ram_used: usize, unpacked_frames: usize, } @@ -350,47 +378,43 @@ impl FrameStats { } fn add(&mut self, frame: &FrameData) { - if self.unique_frames.insert(frame.frame_index()) { - self.total_ram_used = self - .total_ram_used - .saturating_add(frame.bytes_of_ram_used()); - - self.unpacked_frames = self - .unpacked_frames - .saturating_add(frame.has_unpacked() as usize); - } + self.total_ram_used = self + .total_ram_used + .saturating_add(frame.bytes_of_ram_used()); + + self.unpacked_frames = self + .unpacked_frames + .saturating_add(frame.has_unpacked() as usize); + + self.unique_frames = self.unique_frames.saturating_add(1); } fn remove(&mut self, frame: &FrameData) { - if self.unique_frames.remove(&frame.frame_index()) { - self.total_ram_used = self - .total_ram_used - .saturating_sub(frame.bytes_of_ram_used()); - - self.unpacked_frames = self - .unpacked_frames - .saturating_sub(frame.has_unpacked() as usize); - } + self.total_ram_used = self + .total_ram_used + .saturating_sub(frame.bytes_of_ram_used()); + + self.unpacked_frames = self + .unpacked_frames + .saturating_sub(frame.has_unpacked() as usize); + + self.unique_frames = self.unique_frames.saturating_sub(1); } pub fn frames(&self) -> usize { - assert!(self.unique_frames.len() >= self.unpacked_frames); - self.unique_frames.len() + self.unique_frames } pub fn unpacked_frames(&self) -> usize { - assert!(self.unique_frames.len() >= self.unpacked_frames); self.unpacked_frames } pub fn bytes_of_ram_used(&self) -> usize { - assert!(self.unique_frames.len() >= self.unpacked_frames); self.total_ram_used } pub fn clear(&mut self) { - self.unique_frames.clear(); - + self.unique_frames = 0; self.unpacked_frames = 0; self.total_ram_used = 0; } diff --git a/puffin_egui/src/lib.rs b/puffin_egui/src/lib.rs index a48aa93d..81f53300 100644 --- a/puffin_egui/src/lib.rs +++ b/puffin_egui/src/lib.rs @@ -185,6 +185,7 @@ impl GlobalProfilerUi { pub struct AvailableFrames { pub recent: Vec>, pub slowest: Vec>, + pub uniq: Vec>, pub stats: FrameStats, } @@ -192,27 +193,11 @@ impl AvailableFrames { fn latest(frame_view: &FrameView) -> Self { Self { recent: frame_view.recent_frames().cloned().collect(), - slowest: frame_view.slowest_frames_chronological(), + slowest: frame_view.slowest_frames_chronological().cloned().collect(), + uniq: frame_view.all_uniq().cloned().collect(), stats: Default::default(), } } - - fn all_uniq(&self) -> Vec> { - let mut all = self - .slowest - .iter() - .cloned() - .chain(self.recent.iter().cloned()) - .collect::>(); - - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all - } - - fn stats(&self) -> &FrameStats { - &self.stats - } } /// Multiple streams for one thread. @@ -436,14 +421,14 @@ impl ProfilerUi { } } - fn all_known_frames(&self, frame_view: &FrameView) -> Vec> { - let mut all = frame_view.all_uniq(); - if let Some(paused) = &self.paused { - all.append(&mut paused.frames.all_uniq()); + fn all_known_frames<'a>( + &'a self, + frame_view: &'a FrameView, + ) -> Box> + '_> { + match &self.paused { + Some(paused) => Box::new(frame_view.all_uniq().chain(paused.frames.uniq.iter())), + None => Box::new(frame_view.all_uniq()), } - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all } fn run_pack_pass_if_needed(&mut self, frame_view: &FrameView) { @@ -649,8 +634,8 @@ impl ProfilerUi { }); { - let uniq = frames.all_uniq(); - let stats = frames.stats(); + let uniq = &frames.uniq; + let stats = &frames.stats; ui.label(format!( "{} frames ({} unpacked) using approximately {:.1} MB.", @@ -660,7 +645,7 @@ impl ProfilerUi { )); if let Some(frame_view) = frame_view.as_mut() { - max_frames_ui(ui, frame_view, &uniq); + max_frames_ui(ui, frame_view, uniq); if self.paused.is_none() { max_num_latest_ui(ui, &mut self.max_num_latest); }