Skip to content

Commit

Permalink
Optimize frame stats collection
Browse files Browse the repository at this point in the history
Optimize UI performance by calculating frame collection statistics
dynamically. Rather than repeatedly traversing each frame, we now update
the stats upon adding or removing frames. This change mitigates the
significant overhead caused by constantly accessing frame data protected
by RwLock.
  • Loading branch information
andreiltd committed Jun 19, 2023
1 parent c4f6146 commit d1c6697
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 35 deletions.
2 changes: 1 addition & 1 deletion puffin/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ mod profile_view;
pub use data::*;
pub use frame_data::{FrameData, FrameMeta, UnpackedFrameData};
pub use merge::*;
pub use profile_view::{select_slowest, FrameView, GlobalFrameView};
pub use profile_view::{select_slowest, FrameStats, FrameView, GlobalFrameView};

use std::collections::BTreeMap;
use std::sync::{
Expand Down
150 changes: 137 additions & 13 deletions puffin/src/profile_view.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::HashSet;
use std::sync::{Arc, Mutex};

use crate::{FrameData, FrameSinkId};
use crate::{FrameData, FrameIndex, FrameSinkId};

/// A view of recent and slowest frames, used by GUIs.
#[derive(Clone)]
Expand All @@ -16,19 +17,24 @@ pub struct FrameView {
///
/// Only recommended if you set a large max_recent size.
pack_frames: bool,

/// Maintain stats as we add/remove frames
stats: FrameStats,
}

impl Default for FrameView {
fn default() -> Self {
let max_recent = 60 * 60 * 5;
let max_slow = 256;
let stats = Default::default();

Self {
recent: std::collections::VecDeque::with_capacity(max_recent),
max_recent,
slowest: std::collections::BinaryHeap::with_capacity(max_slow),
max_slow,
pack_frames: true,
stats,
}
}
}
Expand All @@ -46,6 +52,7 @@ impl FrameView {
// The safe choice is to clear everything:
self.recent.clear();
self.slowest.clear();
self.stats.clear();
}
}

Expand All @@ -57,24 +64,54 @@ impl FrameView {
false
};

if add_to_slowest {
self.slowest.push(OrderedByDuration(new_frame.clone()));
while self.slowest.len() > self.max_slow {
self.slowest.pop();
}
}

if let Some(last) = self.recent.back() {
// Assume there is a viewer viewing the newest frame,
// and compress the previously newest frame to save RAM:
if self.pack_frames {
last.pack();
}

self.stats.add(last);
}

if add_to_slowest {
self.add_slow_frame(&new_frame);
}

self.add_recent_frame(&new_frame);
}

pub fn add_slow_frame(&mut self, new_frame: &Arc<FrameData>) {
self.slowest.push(OrderedByDuration(new_frame.clone()));

while self.slowest.len() > self.max_slow {
if let Some(removed_frame) = self.slowest.pop() {
// Only remove from stats if the frame is not present in recent
if self
.recent
.binary_search_by_key(&removed_frame.0.frame_index(), |f| f.frame_index())
.is_err()
{
self.stats.remove(&removed_frame.0);
}
}
}
}

pub fn add_recent_frame(&mut self, new_frame: &Arc<FrameData>) {
self.recent.push_back(new_frame.clone());

self.recent.push_back(new_frame);
while self.recent.len() > self.max_recent {
self.recent.pop_front();
if let Some(removed_frame) = self.recent.pop_front() {
// Only remove from stats if the frame is not present in slowest
if !self
.slowest
.iter()
.any(|f| removed_frame.frame_index() == f.0.frame_index())
{
self.stats.remove(&removed_frame);
}
}
}
}

Expand Down Expand Up @@ -107,16 +144,23 @@ impl FrameView {

/// All frames sorted chronologically.
pub fn all_uniq(&self) -> Vec<Arc<FrameData>> {
let mut all: Vec<_> = self.slowest.iter().map(|f| f.0.clone()).collect();
all.extend(self.recent.iter().cloned());
let mut all: Vec<_> = self
.slowest
.iter()
.map(|f| f.0.clone())
.chain(self.recent.iter().cloned())
.collect();

all.sort_by_key(|frame| frame.frame_index());
all.dedup_by_key(|frame| frame.frame_index());
all
}

/// Clean history of the slowest frames.
pub fn clear_slowest(&mut self) {
self.slowest.clear();
for frame in self.slowest.drain() {
self.stats.remove(&frame.0);
}
}

/// How many frames of recent history to store.
Expand Down Expand Up @@ -147,6 +191,18 @@ impl FrameView {
self.pack_frames = pack_frames;
}

/// Retrieve statistics for added frames. This operation is efficient and suitable when
/// frames have not been manipulated outside of `ProfileView`, such as being unpacked. For
/// comprehensive statistics, refer to [`Self::stats_full()`]
pub fn stats(&self) -> &FrameStats {
&self.stats
}

/// Retrieve detailed statistics by performing a full computation on all the added frames.
pub fn stats_full(&self) -> FrameStats {
FrameStats::from_frames(self.all_uniq().map(Arc::as_ref))
}

/// Export profile data as a `.puffin` file.
#[cfg(feature = "serialization")]
#[cfg(not(target_arch = "wasm32"))] // compression not supported on wasm
Expand Down Expand Up @@ -271,3 +327,71 @@ impl GlobalFrameView {
self.view.lock().unwrap()
}
}

// ----------------------------------------------------------------------------

/// Collect statistics for maintained frames
#[derive(Clone, Debug, Default)]
pub struct FrameStats {
unique_frames: HashSet<FrameIndex>,
total_ram_used: usize,
unpacked_frames: usize,
}

impl FrameStats {
pub fn from_frames<'a>(frames: impl Iterator<Item = &'a FrameData>) -> Self {
let mut stats = FrameStats::default();

for frame in frames {
stats.add(frame);
}

stats
}

fn add(&mut self, frame: &FrameData) {
if self.unique_frames.insert(frame.frame_index()) {
self.total_ram_used = self
.total_ram_used
.saturating_add(frame.bytes_of_ram_used());

self.unpacked_frames = self
.unpacked_frames
.saturating_add(frame.has_unpacked() as usize);
}
}

fn remove(&mut self, frame: &FrameData) {
if self.unique_frames.remove(&frame.frame_index()) {
self.total_ram_used = self
.total_ram_used
.saturating_sub(frame.bytes_of_ram_used());

self.unpacked_frames = self
.unpacked_frames
.saturating_sub(frame.has_unpacked() as usize);
}
}

pub fn frames(&self) -> usize {
assert!(self.unique_frames.len() >= self.unpacked_frames);
self.unique_frames.len()
}

pub fn unpacked_frames(&self) -> usize {
assert!(self.unique_frames.len() >= self.unpacked_frames);
self.unpacked_frames
}

pub fn bytes_of_ram_used(&self) -> usize {
assert!(self.unique_frames.len() >= self.unpacked_frames);
self.total_ram_used
}

pub fn clear(&mut self) {
self.unique_frames.clear();

self.unpacked_frames = 0;
self.total_ram_used = 0;
}
}
53 changes: 32 additions & 21 deletions puffin_egui/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,23 +185,34 @@ impl GlobalProfilerUi {
pub struct AvailableFrames {
pub recent: Vec<Arc<FrameData>>,
pub slowest: Vec<Arc<FrameData>>,
pub stats: FrameStats,
}

impl AvailableFrames {
fn latest(frame_view: &FrameView) -> Self {
Self {
recent: frame_view.recent_frames().cloned().collect(),
slowest: frame_view.slowest_frames_chronological(),
stats: Default::default(),
}
}

fn all_uniq(&self) -> Vec<Arc<FrameData>> {
let mut all = self.slowest.clone();
all.extend(self.recent.iter().cloned());
let mut all = self
.slowest
.iter()
.cloned()
.chain(self.recent.iter().cloned())
.collect::<Vec<_>>();

all.sort_by_key(|frame| frame.frame_index());
all.dedup_by_key(|frame| frame.frame_index());
all
}

fn stats(&self) -> &FrameStats {
&self.stats
}
}

/// Multiple streams for one thread.
Expand Down Expand Up @@ -390,8 +401,16 @@ impl ProfilerUi {
/// The frames we can select between
fn frames(&self, frame_view: &FrameView) -> AvailableFrames {
self.paused.as_ref().map_or_else(
|| AvailableFrames::latest(frame_view),
|paused| paused.frames.clone(),
|| {
let mut frames = AvailableFrames::latest(frame_view);
frames.stats = frame_view.stats().clone();
frames
},
|paused| {
let mut frames = paused.frames.clone();
frames.stats = frame_view.stats_full();
frames
},
)
}

Expand Down Expand Up @@ -631,21 +650,17 @@ impl ProfilerUi {

{
let uniq = frames.all_uniq();
let mut bytes = 0;
let mut unpacked = 0;
for frame in &uniq {
bytes += frame.bytes_of_ram_used();
unpacked += frame.has_unpacked() as usize;
}
let stats = frames.stats();

ui.label(format!(
"{} frames ({} unpacked) using approximately {:.1} MB.",
uniq.len(),
unpacked,
bytes as f64 * 1e-6
stats.frames(),
stats.unpacked_frames(),
stats.bytes_of_ram_used() as f64 * 1e-6
));

if let Some(frame_view) = frame_view.as_mut() {
max_frames_ui(ui, frame_view);
max_frames_ui(ui, frame_view, &uniq);
if self.paused.is_none() {
max_num_latest_ui(ui, &mut self.max_num_latest);
}
Expand Down Expand Up @@ -835,13 +850,9 @@ fn format_time(nanos: NanoSecond) -> Option<String> {
}
}

fn max_frames_ui(ui: &mut egui::Ui, frame_view: &mut FrameView) {
let uniq = frame_view.all_uniq();

let mut bytes = 0;
for frame in &uniq {
bytes += frame.bytes_of_ram_used();
}
fn max_frames_ui(ui: &mut egui::Ui, frame_view: &mut FrameView, uniq: &Vec<Arc<FrameData>>) {
let stats = frame_view.stats();
let bytes = stats.bytes_of_ram_used();

let frames_per_second = if let (Some(first), Some(last)) = (uniq.first(), uniq.last()) {
let nanos = last.range_ns().1 - first.range_ns().0;
Expand Down

0 comments on commit d1c6697

Please sign in to comment.