From da309d6b6ebe3b3879db4312e1157f62c633861c Mon Sep 17 00:00:00 2001 From: Tomasz Andrzejak Date: Mon, 19 Jun 2023 20:11:46 +0200 Subject: [PATCH] Optimize cloning/sorting/deduplication of frames This update modifies the data containers used for storing frames to adopt binary tree structures. As a result, all frames are sorted at the time of insertion, eliminating the need for subsequent sorting during vector collection. Moreover, instead of returning vectors, the binary tree structure enables iteration over elements in a sorted order, providing iterators rather than vectors. This functionality gives the API caller the flexibility to either clone frames or simply inspect them in an efficient manner. --- Cargo.lock | 1 + puffin-imgui/src/ui.rs | 11 +- puffin/Cargo.toml | 1 + puffin/src/profile_view.rs | 212 +++++++++++++++++++++---------------- puffin_egui/src/lib.rs | 41 +++---- 5 files changed, 142 insertions(+), 124 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b5d3195e..c9202a95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2138,6 +2138,7 @@ dependencies = [ "cfg-if", "criterion", "instant", + "itertools", "js-sys", "lz4_flex", "once_cell", diff --git a/puffin-imgui/src/ui.rs b/puffin-imgui/src/ui.rs index 28fbf36f..99f2cc51 100644 --- a/puffin-imgui/src/ui.rs +++ b/puffin-imgui/src/ui.rs @@ -357,7 +357,7 @@ impl ProfilerUi { let view = self.frame_view.lock(); Frames { recent: view.recent_frames().cloned().collect(), - slowest: view.slowest_frames_chronological(), + slowest: view.slowest_frames_chronological().cloned().collect(), } } @@ -392,10 +392,17 @@ impl ProfilerUi { } fn all_known_frames(&self) -> Vec> { - let mut all = self.frame_view.lock().all_uniq(); + let mut all = self + .frame_view + .lock() + .all_uniq() + .cloned() + .collect::>(); + if let Some(paused) = &self.paused { all.append(&mut paused.frames.all_uniq()); } + all.sort_by_key(|frame| frame.frame_index()); all.dedup_by_key(|frame| frame.frame_index()); all diff --git a/puffin/Cargo.toml b/puffin/Cargo.toml index 9e1b0848..3376ccd9 100644 --- a/puffin/Cargo.toml +++ b/puffin/Cargo.toml @@ -38,6 +38,7 @@ web = ["instant/wasm-bindgen", "dep:js-sys"] byteorder = { version = "1.0" } cfg-if = "1.0" instant = { version = "0.1" } +itertools = "0.10" once_cell = "1.0" # Optional: diff --git a/puffin/src/profile_view.rs b/puffin/src/profile_view.rs index db32e4db..321a2b6f 100644 --- a/puffin/src/profile_view.rs +++ b/puffin/src/profile_view.rs @@ -1,16 +1,20 @@ -use std::collections::HashSet; +use std::cmp::Ordering; + use std::sync::{Arc, Mutex}; -use crate::{FrameData, FrameIndex, FrameSinkId}; +use itertools::Itertools; + +use crate::{FrameData, FrameSinkId}; /// A view of recent and slowest frames, used by GUIs. #[derive(Clone)] pub struct FrameView { /// newest first - recent: std::collections::VecDeque>, + recent: std::collections::VecDeque, max_recent: usize, - slowest: std::collections::BinaryHeap, + slowest_by_index: std::collections::BTreeSet, + slowest_by_duration: std::collections::BTreeSet, max_slow: usize, /// Minimizes memory usage at the expense of CPU time. @@ -31,7 +35,8 @@ impl Default for FrameView { Self { recent: std::collections::VecDeque::with_capacity(max_recent), max_recent, - slowest: std::collections::BinaryHeap::with_capacity(max_slow), + slowest_by_index: std::collections::BTreeSet::new(), + slowest_by_duration: std::collections::BTreeSet::new(), max_slow, pack_frames: true, stats, @@ -41,39 +46,40 @@ impl Default for FrameView { impl FrameView { pub fn is_empty(&self) -> bool { - self.recent.is_empty() && self.slowest.is_empty() + self.recent.is_empty() && self.slowest_by_duration.is_empty() } pub fn add_frame(&mut self, new_frame: Arc) { - if let Some(last) = self.recent.back() { - if new_frame.frame_index() <= last.frame_index() { + if let Some(last) = self.recent.iter().last() { + if new_frame.frame_index() <= last.0.frame_index() { // A frame from the past!? // Likely we are `puffin_viewer`, and the server restarted. // The safe choice is to clear everything: - self.recent.clear(); - self.slowest.clear(); self.stats.clear(); + self.recent.clear(); + self.slowest_by_index.clear(); + self.slowest_by_duration.clear(); } } - let add_to_slowest = if self.slowest.len() < self.max_slow { - true - } else if let Some(fastest_of_the_slow) = self.slowest.peek() { - new_frame.duration_ns() > fastest_of_the_slow.0.duration_ns() - } else { - false - }; - - if let Some(last) = self.recent.back() { + if let Some(last) = self.recent.iter().last() { // Assume there is a viewer viewing the newest frame, // and compress the previously newest frame to save RAM: if self.pack_frames { - last.pack(); + last.0.pack(); } - self.stats.add(last); + self.stats.add(&last.0); } + let add_to_slowest = if self.slowest_by_duration.len() < self.max_slow { + true + } else if let Some(fastest_of_the_slow) = self.slowest_by_duration.iter().last() { + new_frame.duration_ns() > fastest_of_the_slow.0.duration_ns() + } else { + false + }; + if add_to_slowest { self.add_slow_frame(&new_frame); } @@ -82,16 +88,20 @@ impl FrameView { } pub fn add_slow_frame(&mut self, new_frame: &Arc) { - self.slowest.push(OrderedByDuration(new_frame.clone())); + assert_eq!(self.slowest_by_duration.len(), self.slowest_by_index.len()); + + self.slowest_by_duration + .insert(OrderedByDuration(new_frame.clone())); + self.slowest_by_index + .insert(OrderedByIndex(new_frame.clone())); + + while self.slowest_by_duration.len() > self.max_slow { + if let Some(removed_frame) = self.slowest_by_duration.pop_last() { + let removed_by_index = OrderedByIndex(removed_frame.0.clone()); + self.slowest_by_index.remove(&removed_by_index); - while self.slowest.len() > self.max_slow { - if let Some(removed_frame) = self.slowest.pop() { // Only remove from stats if the frame is not present in recent - if self - .recent - .binary_search_by_key(&removed_frame.0.frame_index(), |f| f.frame_index()) - .is_err() - { + if self.recent.binary_search(&removed_by_index).is_err() { self.stats.remove(&removed_frame.0); } } @@ -99,17 +109,13 @@ impl FrameView { } pub fn add_recent_frame(&mut self, new_frame: &Arc) { - self.recent.push_back(new_frame.clone()); + self.recent.push_back(OrderedByIndex(new_frame.clone())); while self.recent.len() > self.max_recent { if let Some(removed_frame) = self.recent.pop_front() { // Only remove from stats if the frame is not present in slowest - if !self - .slowest - .iter() - .any(|f| removed_frame.frame_index() == f.0.frame_index()) - { - self.stats.remove(&removed_frame); + if !self.slowest_by_index.contains(&removed_frame) { + self.stats.remove(&removed_frame.0); } } } @@ -117,50 +123,44 @@ impl FrameView { /// The latest fully captured frame of data. pub fn latest_frame(&self) -> Option> { - self.recent.back().cloned() + self.recent.iter().last().map(|f| f.0.clone()) } /// Returns up to `n` latest fully captured frames of data. - pub fn latest_frames(&self, n: usize) -> Vec> { + pub fn latest_frames(&self, n: usize) -> impl Iterator> { // Probably not the best way to do this, but since // [`self.recent`] is immutable in this context and // working with deque slices is complicated, we'll do // it this way for now. - self.recent.iter().rev().take(n).rev().cloned().collect() + self.recent.iter().rev().take(n).rev().map(|f| &f.0) } /// Oldest first pub fn recent_frames(&self) -> impl Iterator> { - self.recent.iter() + self.recent.iter().map(|f| &f.0) } /// The slowest frames so far (or since last call to [`Self::clear_slowest()`]) /// in chronological order. - pub fn slowest_frames_chronological(&self) -> Vec> { - let mut frames: Vec<_> = self.slowest.iter().map(|f| f.0.clone()).collect(); - frames.sort_by_key(|frame| frame.frame_index()); - frames + pub fn slowest_frames_chronological(&self) -> impl Iterator> { + self.slowest_by_index.iter().map(|f| &f.0) } /// All frames sorted chronologically. - pub fn all_uniq(&self) -> Vec> { - let mut all: Vec<_> = self - .slowest - .iter() - .map(|f| f.0.clone()) - .chain(self.recent.iter().cloned()) - .collect(); - - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all + pub fn all_uniq(&self) -> impl Iterator> { + Itertools::merge(self.recent.iter(), self.slowest_by_index.iter()) + .dedup() + .map(|f| &f.0) } /// Clean history of the slowest frames. pub fn clear_slowest(&mut self) { - for frame in self.slowest.drain() { + for frame in self.slowest_by_index.iter() { self.stats.remove(&frame.0); } + + self.slowest_by_duration.clear(); + self.slowest_by_index.clear(); } /// How many frames of recent history to store. @@ -217,12 +217,7 @@ impl FrameView { pub fn save_to_writer(&self, write: &mut impl std::io::Write) -> anyhow::Result<()> { write.write_all(b"PUF0")?; - let slowest_frames = self.slowest.iter().map(|f| &f.0); - let mut frames: Vec<_> = slowest_frames.chain(self.recent.iter()).collect(); - frames.sort_by_key(|frame| frame.frame_index()); - frames.dedup_by_key(|frame| frame.frame_index()); - - for frame in frames { + for frame in self.all_uniq() { frame.write_into(write)?; } Ok(()) @@ -277,22 +272,55 @@ pub fn select_slowest(frames: &[Arc], max: usize) -> Vec); +impl Ord for OrderedByDuration { + fn cmp(&self, other: &Self) -> Ordering { + match self.0.duration_ns().cmp(&other.0.duration_ns()).reverse() { + Ordering::Equal => self.0.frame_index().cmp(&other.0.frame_index()), + res => res, + } + } +} + +impl PartialOrd for OrderedByDuration { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Eq for OrderedByDuration {} + impl PartialEq for OrderedByDuration { fn eq(&self, other: &Self) -> bool { - self.0.duration_ns().eq(&other.0.duration_ns()) + self.0.duration_ns() == other.0.duration_ns() + && self.0.frame_index() == other.0.frame_index() } } -impl Eq for OrderedByDuration {} -impl PartialOrd for OrderedByDuration { - fn partial_cmp(&self, other: &Self) -> Option { +// ---------------------------------------------------------------------------- +#[derive(Clone)] +struct OrderedByIndex(Arc); + +impl Ord for OrderedByIndex { + fn cmp(&self, other: &Self) -> Ordering { + match self.0.frame_index().cmp(&other.0.frame_index()) { + Ordering::Equal => self.0.duration_ns().cmp(&other.0.duration_ns()), + res => res, + } + } +} + +impl PartialOrd for OrderedByIndex { + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Ord for OrderedByDuration { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.0.duration_ns().cmp(&other.0.duration_ns()).reverse() +impl Eq for OrderedByIndex {} + +impl PartialEq for OrderedByIndex { + fn eq(&self, other: &Self) -> bool { + self.0.frame_index() == other.0.frame_index() + && self.0.duration_ns() == other.0.duration_ns() } } @@ -333,7 +361,7 @@ impl GlobalFrameView { /// Collect statistics for maintained frames #[derive(Clone, Debug, Default)] pub struct FrameStats { - unique_frames: HashSet, + unique_frames: usize, total_ram_used: usize, unpacked_frames: usize, } @@ -350,47 +378,43 @@ impl FrameStats { } fn add(&mut self, frame: &FrameData) { - if self.unique_frames.insert(frame.frame_index()) { - self.total_ram_used = self - .total_ram_used - .saturating_add(frame.bytes_of_ram_used()); - - self.unpacked_frames = self - .unpacked_frames - .saturating_add(frame.has_unpacked() as usize); - } + self.total_ram_used = self + .total_ram_used + .saturating_add(frame.bytes_of_ram_used()); + + self.unpacked_frames = self + .unpacked_frames + .saturating_add(frame.has_unpacked() as usize); + + self.unique_frames = self.unique_frames.saturating_add(1); } fn remove(&mut self, frame: &FrameData) { - if self.unique_frames.remove(&frame.frame_index()) { - self.total_ram_used = self - .total_ram_used - .saturating_sub(frame.bytes_of_ram_used()); - - self.unpacked_frames = self - .unpacked_frames - .saturating_sub(frame.has_unpacked() as usize); - } + self.total_ram_used = self + .total_ram_used + .saturating_sub(frame.bytes_of_ram_used()); + + self.unpacked_frames = self + .unpacked_frames + .saturating_sub(frame.has_unpacked() as usize); + + self.unique_frames = self.unique_frames.saturating_sub(1); } pub fn frames(&self) -> usize { - assert!(self.unique_frames.len() >= self.unpacked_frames); - self.unique_frames.len() + self.unique_frames } pub fn unpacked_frames(&self) -> usize { - assert!(self.unique_frames.len() >= self.unpacked_frames); self.unpacked_frames } pub fn bytes_of_ram_used(&self) -> usize { - assert!(self.unique_frames.len() >= self.unpacked_frames); self.total_ram_used } pub fn clear(&mut self) { - self.unique_frames.clear(); - + self.unique_frames = 0; self.unpacked_frames = 0; self.total_ram_used = 0; } diff --git a/puffin_egui/src/lib.rs b/puffin_egui/src/lib.rs index a48aa93d..81f53300 100644 --- a/puffin_egui/src/lib.rs +++ b/puffin_egui/src/lib.rs @@ -185,6 +185,7 @@ impl GlobalProfilerUi { pub struct AvailableFrames { pub recent: Vec>, pub slowest: Vec>, + pub uniq: Vec>, pub stats: FrameStats, } @@ -192,27 +193,11 @@ impl AvailableFrames { fn latest(frame_view: &FrameView) -> Self { Self { recent: frame_view.recent_frames().cloned().collect(), - slowest: frame_view.slowest_frames_chronological(), + slowest: frame_view.slowest_frames_chronological().cloned().collect(), + uniq: frame_view.all_uniq().cloned().collect(), stats: Default::default(), } } - - fn all_uniq(&self) -> Vec> { - let mut all = self - .slowest - .iter() - .cloned() - .chain(self.recent.iter().cloned()) - .collect::>(); - - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all - } - - fn stats(&self) -> &FrameStats { - &self.stats - } } /// Multiple streams for one thread. @@ -436,14 +421,14 @@ impl ProfilerUi { } } - fn all_known_frames(&self, frame_view: &FrameView) -> Vec> { - let mut all = frame_view.all_uniq(); - if let Some(paused) = &self.paused { - all.append(&mut paused.frames.all_uniq()); + fn all_known_frames<'a>( + &'a self, + frame_view: &'a FrameView, + ) -> Box> + '_> { + match &self.paused { + Some(paused) => Box::new(frame_view.all_uniq().chain(paused.frames.uniq.iter())), + None => Box::new(frame_view.all_uniq()), } - all.sort_by_key(|frame| frame.frame_index()); - all.dedup_by_key(|frame| frame.frame_index()); - all } fn run_pack_pass_if_needed(&mut self, frame_view: &FrameView) { @@ -649,8 +634,8 @@ impl ProfilerUi { }); { - let uniq = frames.all_uniq(); - let stats = frames.stats(); + let uniq = &frames.uniq; + let stats = &frames.stats; ui.label(format!( "{} frames ({} unpacked) using approximately {:.1} MB.", @@ -660,7 +645,7 @@ impl ProfilerUi { )); if let Some(frame_view) = frame_view.as_mut() { - max_frames_ui(ui, frame_view, &uniq); + max_frames_ui(ui, frame_view, uniq); if self.paused.is_none() { max_num_latest_ui(ui, &mut self.max_num_latest); }