Skip to content

Commit

Permalink
Merge pull request #35 from cwfitzgerald/tracy-2
Browse files Browse the repository at this point in the history
Integrate with tracy-client
  • Loading branch information
Wumpf authored Aug 2, 2023
2 parents c9c3415 + 845c037 commit edc6d30
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 10 deletions.
10 changes: 9 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,22 @@ homepage = "https://github.com/Wumpf/wgpu-profiler"
repository = "https://github.com/Wumpf/wgpu-profiler"
keywords = ["graphics"]
license = "MIT OR Apache-2.0"
resolver = "2"

[features]
tracy = ["tracy-client", "profiling/profile-with-tracy"]

[lib]

[dependencies]
tracy-client = { version = "0.15", optional = true }
wgpu = "0.17"

[dev-dependencies]
profiling = { version = "1" }
tracy-client = "0.15"
winit = "0.28"
futures-lite = "1"
#env_logger = "0.8.2"

[patch.crates-io]
tracy-client = { git = "https://github.com/cwfitzgerald/rust_tracy_client.git", rev = "1be35c854a7c22e09063ca25efe4438e606c6b50" }
25 changes: 21 additions & 4 deletions examples/demo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ fn scopes_to_console_recursive(results: &[GpuTimerScopeResult], indentation: u32
}

fn console_output(results: &Option<Vec<GpuTimerScopeResult>>, enabled_features: wgpu::Features) {
profiling::scope!("console_output");
print!("\x1B[2J\x1B[1;1H"); // Clear terminal and put cursor to first row first column
println!("Welcome to wgpu_profiler demo!");
println!();
Expand Down Expand Up @@ -111,7 +112,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
surface.configure(&device, &sc_desc);

// Create a new profiler instance
let mut profiler = GpuProfiler::new(4, queue.get_timestamp_period(), device.features());
let mut profiler = GpuProfiler::new(&adapter, &device, &queue, 4);
let mut latest_profiler_results = None;

event_loop.run(move |event, _, control_flow| {
Expand All @@ -137,11 +138,15 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
window.request_redraw();
}
Event::RedrawRequested(_) => {
profiling::scope!("Redraw Requested");

let frame = surface.get_current_texture().expect("Failed to acquire next surface texture");
let frame_view = frame.texture.create_view(&wgpu::TextureViewDescriptor::default());
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });

wgpu_profiler!("rendering", &mut profiler, &mut encoder, &device, {
profiling::scope!("Rendering");

let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
Expand Down Expand Up @@ -188,8 +193,16 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
// Resolves any queries that might be in flight.
profiler.resolve_queries(&mut encoder);

queue.submit(Some(encoder.finish()));
frame.present();
{
profiling::scope!("Submit");
queue.submit(Some(encoder.finish()));
}
{
profiling::scope!("Present");
frame.present();
}

profiling::finish_frame!();

// Signal to the profiler that the frame is finished.
profiler.end_frame().unwrap();
Expand Down Expand Up @@ -228,8 +241,12 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
}

fn main() {
tracy_client::Client::start();
//env_logger::init_from_env(env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "warn"));
let event_loop = EventLoop::new();
let window = winit::window::Window::new(&event_loop).unwrap();
let window = winit::window::WindowBuilder::new()
.with_fullscreen(Some(winit::window::Fullscreen::Borderless(None)))
.build(&event_loop)
.unwrap();
futures_lite::future::block_on(run(event_loop, window));
}
38 changes: 34 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ use std::{convert::TryInto, ops::Range, thread::ThreadId};
pub mod chrometrace;
pub mod macros;
pub mod scope;
#[cfg(feature = "tracy")]
pub mod tracy;

pub struct GpuTimerScopeResult {
pub label: String,
Expand Down Expand Up @@ -107,6 +109,9 @@ pub struct GpuProfiler {

max_num_pending_frames: usize,
timestamp_to_sec: f64,

#[cfg(feature = "tracy")]
tracy_context: tracy_client::GpuContext,
}

// Public interface
Expand All @@ -133,8 +138,10 @@ impl GpuProfiler {
/// (Typical values for `max_num_pending_frames` are 2~4)
///
/// `timestamp_period` needs to be set to the result of [`wgpu::Queue::get_timestamp_period`]
pub fn new(max_num_pending_frames: usize, timestamp_period: f32, active_features: wgpu::Features) -> Self {
pub fn new(_adapter: &wgpu::Adapter, device: &wgpu::Device, queue: &wgpu::Queue, max_num_pending_frames: usize) -> Self {
assert!(max_num_pending_frames > 0);
let active_features = device.features();
let timestamp_period = queue.get_timestamp_period();
GpuProfiler {
enable_pass_timer: active_features.contains(wgpu::Features::TIMESTAMP_QUERY_INSIDE_PASSES),
enable_encoder_timer: active_features.contains(wgpu::Features::TIMESTAMP_QUERY),
Expand All @@ -154,6 +161,9 @@ impl GpuProfiler {

max_num_pending_frames,
timestamp_to_sec: timestamp_period as f64 / 1000.0 / 1000.0 / 1000.0,

#[cfg(feature = "tracy")]
tracy_context: tracy::create_tracy_gpu_client(_adapter.get_info().backend, device, queue, timestamp_period),
}
}

Expand All @@ -164,6 +174,7 @@ impl GpuProfiler {
/// May create new wgpu query objects (which is why it needs a [`wgpu::Device`] reference)
///
/// See also [`wgpu_profiler!`], [`GpuProfiler::end_scope`]
#[track_caller]
pub fn begin_scope<Recorder: ProfilerCommandRecorder>(&mut self, label: &str, encoder_or_pass: &mut Recorder, device: &wgpu::Device) {
if (encoder_or_pass.is_pass() && self.enable_pass_timer) || (!encoder_or_pass.is_pass() && self.enable_encoder_timer) {
let start_query = self.allocate_query_pair(device);
Expand All @@ -176,12 +187,16 @@ impl GpuProfiler {
let pid = std::process::id();
let tid = std::thread::current().id();

let _location = std::panic::Location::caller();

self.open_scopes.push(UnprocessedTimerScope {
label: String::from(label),
start_query,
nested_scopes: Vec::new(),
pid,
tid,
#[cfg(feature = "tracy")]
tracy_scope: self.tracy_context.span_alloc(label, "", _location.file(), _location.line()).ok(),
});
}
if self.enable_debug_marker {
Expand All @@ -196,11 +211,19 @@ impl GpuProfiler {
/// See also [`wgpu_profiler!`], [`GpuProfiler::begin_scope`]
pub fn end_scope<Recorder: ProfilerCommandRecorder>(&mut self, encoder_or_pass: &mut Recorder) {
if (encoder_or_pass.is_pass() && self.enable_pass_timer) || (!encoder_or_pass.is_pass() && self.enable_encoder_timer) {
let open_scope = self.open_scopes.pop().expect("No profiler GpuProfiler scope was previously opened");
let mut open_scope = self.open_scopes.pop().expect("No profiler GpuProfiler scope was previously opened");
encoder_or_pass.write_timestamp(
&self.active_frame.query_pools[open_scope.start_query.pool_idx as usize].query_set,
open_scope.start_query.query_idx + 1,
);

#[cfg(feature = "tracy")]
if let Some(ref mut tracy_scope) = open_scope.tracy_scope {
tracy_scope.end_zone();
}
#[cfg(not(feature = "tracy"))]
let _ = &mut open_scope;

if let Some(open_parent_scope) = self.open_scopes.last_mut() {
open_parent_scope.nested_scopes.push(open_scope);
} else {
Expand Down Expand Up @@ -329,8 +352,8 @@ impl GpuProfiler {
// Internals
// --------------------------------------------------------------------------------

const QUERY_SIZE: u32 = 8; // Newer wgpu version have QUERY_SIZE
const QUERY_SET_MAX_QUERIES: u32 = 8192; // Newer wgpu version have QUERY_SET_MAX_QUERIES
const QUERY_SIZE: u32 = wgpu::QUERY_SIZE;
const QUERY_SET_MAX_QUERIES: u32 = wgpu::QUERY_SET_MAX_QUERIES;

impl GpuProfiler {
fn reset_and_cache_unused_query_pools(&mut self, mut query_pools: Vec<QueryPool>) {
Expand Down Expand Up @@ -409,6 +432,11 @@ impl GpuProfiler {
.unwrap(),
);

#[cfg(feature = "tracy")]
if let Some(tracy_scope) = scope.tracy_scope {
tracy_scope.upload_timestamp(start_raw as i64, end_raw as i64);
}

GpuTimerScopeResult {
label: scope.label,
time: (start_raw as f64 * timestamp_to_sec)..(end_raw as f64 * timestamp_to_sec),
Expand All @@ -433,6 +461,8 @@ struct UnprocessedTimerScope {
nested_scopes: Vec<UnprocessedTimerScope>,
pub pid: u32,
pub tid: ThreadId,
#[cfg(feature = "tracy")]
tracy_scope: Option<tracy_client::GpuSpan>,
}

/// A pool of queries, consisting of a single queryset & buffer for query results.
Expand Down
13 changes: 13 additions & 0 deletions src/scope.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ pub struct ManualOwningScope<'a, W: ProfilerCommandRecorder> {
impl<'a, W: ProfilerCommandRecorder> Scope<'a, W> {
/// Starts a new profiler scope. Scope is closed on drop.
#[must_use]
#[track_caller]
pub fn start(label: &str, profiler: &'a mut GpuProfiler, recorder: &'a mut W, device: &wgpu::Device) -> Self {
profiler.begin_scope(label, recorder, device);
Self { profiler, recorder }
}

/// Starts a scope nested within this one.
#[must_use]
#[track_caller]
pub fn scope(&mut self, label: &str, device: &wgpu::Device) -> Scope<'_, W> {
Scope::start(label, self.profiler, self.recorder, device)
}
Expand All @@ -45,13 +47,15 @@ impl<'a, W: ProfilerCommandRecorder> Scope<'a, W> {
impl<'a, W: ProfilerCommandRecorder> OwningScope<'a, W> {
/// Starts a new profiler scope. Scope is closed on drop.
#[must_use]
#[track_caller]
pub fn start(label: &str, profiler: &'a mut GpuProfiler, mut recorder: W, device: &wgpu::Device) -> Self {
profiler.begin_scope(label, &mut recorder, device);
Self { profiler, recorder }
}

/// Starts a scope nested within this one.
#[must_use]
#[track_caller]
pub fn scope(&mut self, label: &str, device: &wgpu::Device) -> Scope<'_, W> {
Scope::start(label, self.profiler, &mut self.recorder, device)
}
Expand All @@ -60,27 +64,31 @@ impl<'a, W: ProfilerCommandRecorder> OwningScope<'a, W> {
impl<'a, W: ProfilerCommandRecorder> ManualOwningScope<'a, W> {
/// Starts a new profiler scope. Scope is NOT closed on drop and needs to be closed manually with [`ManualOwningScope::end_scope`]
#[must_use]
#[track_caller]
pub fn start(label: &str, profiler: &'a mut GpuProfiler, mut recorder: W, device: &wgpu::Device) -> Self {
profiler.begin_scope(label, &mut recorder, device);
Self { profiler, recorder }
}

/// Starts a scope nested within this one
#[must_use]
#[track_caller]
pub fn scope(&mut self, label: &str, device: &wgpu::Device) -> Scope<'_, W> {
Scope::start(label, self.profiler, &mut self.recorder, device)
}

/// Ends the scope allowing the extraction of owned the ProfilerCommandRecorder
/// and the mutable reference to the GpuProfiler.
#[must_use]
#[track_caller]
pub fn end_scope(mut self) -> (W, &'a mut GpuProfiler) {
self.profiler.end_scope(&mut self.recorder);
(self.recorder, self.profiler)
}
}
impl<'a> Scope<'a, wgpu::CommandEncoder> {
/// Start a render pass wrapped in a OwningScope.
#[track_caller]
pub fn scoped_render_pass<'b>(
&'b mut self,
label: &str,
Expand All @@ -92,6 +100,7 @@ impl<'a> Scope<'a, wgpu::CommandEncoder> {
}

/// Start a compute pass wrapped in a OwningScope.
#[track_caller]
pub fn scoped_compute_pass(
&mut self,
label: &str,
Expand All @@ -105,6 +114,7 @@ impl<'a> Scope<'a, wgpu::CommandEncoder> {

impl<'a> OwningScope<'a, wgpu::CommandEncoder> {
/// Start a render pass wrapped in an OwningScope.
#[track_caller]
pub fn scoped_render_pass<'b>(
&'b mut self,
label: &str,
Expand All @@ -116,6 +126,7 @@ impl<'a> OwningScope<'a, wgpu::CommandEncoder> {
}

/// Start a compute pass wrapped in a OwningScope.
#[track_caller]
pub fn scoped_compute_pass(
&mut self,
label: &str,
Expand All @@ -129,6 +140,7 @@ impl<'a> OwningScope<'a, wgpu::CommandEncoder> {

impl<'a> ManualOwningScope<'a, wgpu::CommandEncoder> {
/// Start a render pass wrapped in an OwningScope.
#[track_caller]
pub fn scoped_render_pass<'b>(
&'b mut self,
label: &str,
Expand All @@ -140,6 +152,7 @@ impl<'a> ManualOwningScope<'a, wgpu::CommandEncoder> {
}

/// Start a compute pass wrapped in an OwningScope.
#[track_caller]
pub fn scoped_compute_pass(
&mut self,
label: &str,
Expand Down
53 changes: 53 additions & 0 deletions src/tracy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
pub(crate) fn create_tracy_gpu_client(
backend: wgpu::Backend,
device: &wgpu::Device,
queue: &wgpu::Queue,
timestamp_period: f32,
) -> tracy_client::GpuContext {
let query_set = device.create_query_set(&wgpu::QuerySetDescriptor {
label: Some("wgpu-profiler gpu -> cpu sync query_set"),
ty: wgpu::QueryType::Timestamp,
count: 1,
});

let resolve_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("wgpu-profiler gpu -> cpu resolve buffer"),
size: crate::QUERY_SIZE as _,
usage: wgpu::BufferUsages::QUERY_RESOLVE | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});

let map_buffer = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("wgpu-profiler gpu -> cpu map buffer"),
size: crate::QUERY_SIZE as _,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});

let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("wgpu-profiler gpu -> cpu sync cmd_buf"),
});
encoder.write_timestamp(&query_set, 0);
encoder.resolve_query_set(&query_set, 0..1, &resolve_buffer, 0);
encoder.copy_buffer_to_buffer(&resolve_buffer, 0, &map_buffer, 0, crate::QUERY_SIZE as _);
queue.submit(Some(encoder.finish()));

let _ = map_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ());
device.poll(wgpu::Maintain::Wait);

let view = map_buffer.slice(..).get_mapped_range();
let timestamp: i64 = i64::from_le_bytes((*view).try_into().unwrap());

let tracy_backend = match backend {
wgpu::Backend::Empty | wgpu::Backend::Metal | wgpu::Backend::BrowserWebGpu => tracy_client::GpuContextType::Invalid,
wgpu::Backend::Vulkan => tracy_client::GpuContextType::Vulkan,
wgpu::Backend::Dx12 => tracy_client::GpuContextType::Direct3D12,
wgpu::Backend::Dx11 => tracy_client::GpuContextType::Direct3D11,
wgpu::Backend::Gl => tracy_client::GpuContextType::OpenGL,
};

tracy_client::Client::running()
.expect("tracy client not running")
.new_gpu_context(Some("wgpu"), tracy_backend, timestamp, timestamp_period)
.unwrap()
}
2 changes: 1 addition & 1 deletion tests/dropped_frame_handling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async fn handle_dropped_frames_gracefully_async() {
.unwrap();

// max_num_pending_frames is one!
let mut profiler = wgpu_profiler::GpuProfiler::new(1, queue.get_timestamp_period(), device.features());
let mut profiler = wgpu_profiler::GpuProfiler::new(&adapter, &device, &queue, 1);

// Two frames without device poll, causing the profiler to drop a frame on the second round.
for _ in 0..2 {
Expand Down

0 comments on commit edc6d30

Please sign in to comment.