diff --git a/Cargo.lock b/Cargo.lock index 484f853..d9b7825 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -82,6 +82,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anyhow" +version = "1.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" + [[package]] name = "arrayref" version = "0.3.7" @@ -121,6 +127,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bit-set" version = "0.6.0" @@ -175,6 +190,12 @@ version = "1.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea31d69bda4949c1c1562c1e6f042a1caefac98cdc8a298260a2ff41c1e2d42b" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.5.0" @@ -341,6 +362,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.19" @@ -400,6 +430,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f25c0e292a7ca6d6498557ff1df68f32c99850012b6ea401cf8daf771f22ff53" +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + [[package]] name = "equivalent" version = "1.0.1" @@ -633,6 +669,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "jni" version = "0.21.1" @@ -764,6 +809,12 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" + [[package]] name = "malloc_buf" version = "0.0.6" @@ -1116,6 +1167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58" dependencies = [ "profiling-procmacros", + "puffin", "tracy-client", ] @@ -1129,6 +1181,36 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "puffin" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa9dae7b05c02ec1a6bc9bcf20d8bc64a7dcbf57934107902a872014899b741f" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "cfg-if", + "itertools", + "lz4_flex", + "once_cell", + "parking_lot", + "serde", +] + +[[package]] +name = "puffin_http" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "739a3c7f56604713b553d7addd7718c226e88d598979ae3450320800bd0e9810" +dependencies = [ + "anyhow", + "crossbeam-channel", + "log", + "parking_lot", + "puffin", +] + [[package]] name = "quick-xml" version = "0.31.0" @@ -1924,6 +2006,8 @@ dependencies = [ "futures-lite", "parking_lot", "profiling", + "puffin", + "puffin_http", "thiserror", "tracy-client", "wgpu", diff --git a/Cargo.toml b/Cargo.toml index ce1aa3a..d4a3150 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ clippy.doc_markdown = "warn" [features] tracy = ["dep:tracy-client", "profiling/profile-with-tracy"] +puffin = ["dep:puffin", "profiling/profile-with-puffin"] [lib] @@ -23,10 +24,11 @@ thiserror = "1" wgpu = "22.1.0" tracy-client = { version = "0.17", optional = true } - +puffin = { version = "0.19.1", optional = true } [dev-dependencies] futures-lite = "2" -profiling = { version = "1" } +profiling = "1" +puffin_http = "0.16.1" tracy-client = "0.17.0" winit = "0.30" diff --git a/README.md b/README.md index 5c04879..ecc9f79 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Simple profiler scopes for wgpu using timer queries * Easy to use profiler scopes * Allows nesting! * Can be disabled by runtime flag - * Additionally generates debug markers + * Additionally generates debug markers * Thread-safe - can profile several command encoder/buffers in parallel * Internally creates pools of timer queries automatically * Does not need to know in advance how many queries/profiling scopes are needed @@ -17,6 +17,7 @@ Simple profiler scopes for wgpu using timer queries * Many profiler instances can live side by side * chrome trace flamegraph json export * Tracy integration (behind `tracy` feature flag) +* Puffin integration (behind `puffin` feature flag) ## How to use diff --git a/examples/demo.rs b/examples/demo.rs index be108c8..8d1dba4 100644 --- a/examples/demo.rs +++ b/examples/demo.rs @@ -7,6 +7,13 @@ use winit::{ keyboard::{KeyCode, PhysicalKey}, }; +#[cfg(feature = "puffin")] +// Since the timing information we get from WGPU may be several frames behind the CPU, we can't report these frames to +// the singleton returned by `puffin::GlobalProfiler::lock`. Instead, we need our own `puffin::GlobalProfiler` that we +// can be several frames behind puffin's main global profiler singleton. +static PUFFIN_GPU_PROFILER: std::sync::LazyLock> = + std::sync::LazyLock::new(|| std::sync::Mutex::new(puffin::GlobalProfiler::default())); + fn scopes_to_console_recursive(results: &[GpuTimerQueryResult], indentation: u32) { for scope in results { if indentation > 0 { @@ -155,6 +162,7 @@ impl GfxState { panic!("Failed to create profiler: {}", err); } }); + #[cfg(not(feature = "tracy"))] let profiler = GpuProfiler::new(GpuProfilerSettings::default()).expect("Failed to create profiler"); @@ -254,6 +262,15 @@ impl ApplicationHandler<()> for State { self.latest_profiler_results = profiler.process_finished_frame(queue.get_timestamp_period()); console_output(&self.latest_profiler_results, device.features()); + #[cfg(feature = "puffin")] + { + let mut gpu_profiler = PUFFIN_GPU_PROFILER.lock().unwrap(); + wgpu_profiler::puffin::output_frame_to_puffin( + &mut gpu_profiler, + self.latest_profiler_results.as_deref().unwrap_or_default(), + ); + gpu_profiler.new_frame(); + } } WindowEvent::KeyboardInput { @@ -386,9 +403,24 @@ fn draw( } fn main() { + #[cfg(feature = "tracy")] tracy_client::Client::start(); + //env_logger::init_from_env(env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "warn")); let event_loop = EventLoop::new().unwrap(); event_loop.set_control_flow(winit::event_loop::ControlFlow::Poll); + + #[cfg(feature = "puffin")] + let (_cpu_server, _gpu_server) = { + puffin::set_scopes_on(true); + let cpu_server = + puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT)).unwrap(); + let gpu_server = puffin_http::Server::new_custom( + &format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT + 1), + |sink| PUFFIN_GPU_PROFILER.lock().unwrap().add_sink(sink), + |id| _ = PUFFIN_GPU_PROFILER.lock().unwrap().remove_sink(id), + ); + (cpu_server, gpu_server) + }; let _ = event_loop.run_app(&mut State::default()); } diff --git a/src/lib.rs b/src/lib.rs index 1fd6a37..708c2d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -106,6 +106,8 @@ mod profiler; mod profiler_command_recorder; mod profiler_query; mod profiler_settings; +#[cfg(feature = "puffin")] +pub mod puffin; mod scope; #[cfg(feature = "tracy")] mod tracy; diff --git a/src/puffin.rs b/src/puffin.rs new file mode 100644 index 0000000..97de8ee --- /dev/null +++ b/src/puffin.rs @@ -0,0 +1,45 @@ +use puffin::{GlobalProfiler, NanoSecond, ScopeDetails, StreamInfo, ThreadInfo}; + +use crate::GpuTimerQueryResult; + +/// Visualize the query results in a `puffin::GlobalProfiler`. +pub fn output_frame_to_puffin(profiler: &mut GlobalProfiler, query_result: &[GpuTimerQueryResult]) { + let mut stream_info = StreamInfo::default(); + collect_stream_info_recursive(profiler, &mut stream_info, query_result, 0); + + profiler.report_user_scopes( + ThreadInfo { + start_time_ns: None, + name: "GPU".to_string(), + }, + &stream_info.as_stream_into_ref(), + ); +} + +fn collect_stream_info_recursive( + profiler: &mut GlobalProfiler, + stream_info: &mut StreamInfo, + query_result: &[GpuTimerQueryResult], + depth: usize, +) { + let details: Vec<_> = query_result + .iter() + .map(|query| ScopeDetails::from_scope_name(query.label.clone())) + .collect(); + let ids = profiler.register_user_scopes(&details); + for (query, id) in query_result.iter().zip(ids) { + if let Some(time) = &query.time { + let start = (time.start * 1e9) as NanoSecond; + let end = (time.end * 1e9) as NanoSecond; + + stream_info.depth = stream_info.depth.max(depth); + stream_info.num_scopes += 1; + stream_info.range_ns.0 = stream_info.range_ns.0.min(start); + stream_info.range_ns.1 = stream_info.range_ns.0.max(end); + + let (offset, _) = stream_info.stream.begin_scope(|| start, id, ""); + collect_stream_info_recursive(profiler, stream_info, &query.nested_queries, depth + 1); + stream_info.stream.end_scope(offset, end as NanoSecond); + } + } +}