From 8d372bfbf59d3cf290ec75023100fb8fceba77f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Wed, 11 Oct 2023 11:32:29 +0200 Subject: [PATCH] Build only needed shaders with `--cpu` --- src/cpu_dispatch.rs | 1 + src/engine.rs | 2 +- src/lib.rs | 14 +- src/shaders.rs | 438 +++++++++++++++++++------------------------- src/util.rs | 1 + src/wgpu_engine.rs | 241 +++++++++++++++--------- 6 files changed, 344 insertions(+), 353 deletions(-) diff --git a/src/cpu_dispatch.rs b/src/cpu_dispatch.rs index 2c3409c16..01c28c31b 100644 --- a/src/cpu_dispatch.rs +++ b/src/cpu_dispatch.rs @@ -24,6 +24,7 @@ pub enum TypedBufGuard<'a, T: ?Sized> { } pub enum TypedBufGuardMut<'a, T: ?Sized> { + #[allow(dead_code)] Slice(&'a mut T), Interior(RefMut<'a, T>), } diff --git a/src/engine.rs b/src/engine.rs index a1797f53e..a122e3537 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -21,7 +21,7 @@ use std::{ pub type Error = Box; -#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, Default)] pub struct ShaderId(pub usize); #[derive(Clone, Copy, PartialEq, Eq, Hash)] diff --git a/src/lib.rs b/src/lib.rs index 006accd3c..c68997cc3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -116,11 +116,8 @@ pub struct RendererOptions { impl Renderer { /// Creates a new renderer for the specified device. pub fn new(device: &Device, render_options: &RendererOptions) -> Result { - let mut engine = WgpuEngine::new(); - let mut shaders = shaders::full_shaders(device, &mut engine)?; - if render_options.use_cpu { - shaders.install_cpu_shaders(&mut engine); - } + let mut engine = WgpuEngine::new(render_options.use_cpu); + let shaders = shaders::full_shaders(device, &mut engine)?; let blit = render_options .surface_format .map(|surface_format| BlitPipeline::new(device, surface_format)); @@ -240,11 +237,8 @@ impl Renderer { #[cfg(feature = "hot_reload")] pub async fn reload_shaders(&mut self, device: &Device) -> Result<()> { device.push_error_scope(wgpu::ErrorFilter::Validation); - let mut engine = WgpuEngine::new(); - let mut shaders = shaders::full_shaders(device, &mut engine)?; - if self.use_cpu { - shaders.install_cpu_shaders(&mut engine); - } + let mut engine = WgpuEngine::new(self.use_cpu); + let shaders = shaders::full_shaders(device, &mut engine)?; let error = device.pop_error_scope().await; if let Some(error) = error { return Err(error.into()); diff --git a/src/shaders.rs b/src/shaders.rs index 91d56db97..de23e077d 100644 --- a/src/shaders.rs +++ b/src/shaders.rs @@ -86,7 +86,9 @@ pub struct FullShaders { #[cfg(feature = "wgpu")] pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result { + use crate::wgpu_engine::CpuShaderType; use crate::ANTIALIASING; + use BindType::*; let imports = SHARED_SHADERS .iter() @@ -109,234 +111,194 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result = None; + + // Uncomment this to force use of GPU shaders from the specified shader and later even + // if `engine.use_cpu` is specified. + //let force_gpu_from = Some("binning"); + + macro_rules! add_shader { + ($name:ident, $bindings:expr, $defines:expr, $cpu:expr) => {{ + if force_gpu_from == Some(stringify!($name)) { + force_gpu = true; + } + engine.add_shader( + device, + stringify!($name), + preprocess::preprocess(shader!(stringify!($name)), &$defines, &imports).into(), + &$bindings, + if force_gpu { + CpuShaderType::Missing + } else { + $cpu + }, + )? + }}; + ($name:ident, $bindings:expr, $defines:expr) => { + add_shader!( + $name, + $bindings, + &$defines, + CpuShaderType::Present(cpu_shader::$name) + ) + }; + ($name:ident, $bindings:expr) => { + add_shader!($name, $bindings, &full_config) + }; + } + + let pathtag_reduce = add_shader!(pathtag_reduce, [Uniform, BufReadOnly, Buffer]); + let pathtag_reduce2 = add_shader!( + pathtag_reduce2, + [BufReadOnly, Buffer], + &full_config, + CpuShaderType::Skipped + ); + let pathtag_scan1 = add_shader!( + pathtag_scan1, + [BufReadOnly, BufReadOnly, Buffer], + &full_config, + CpuShaderType::Skipped + ); + let pathtag_scan = add_shader!( + pathtag_scan, + [Uniform, BufReadOnly, BufReadOnly, Buffer], + &small_config + ); + let pathtag_scan_large = add_shader!( + pathtag_scan, + [Uniform, BufReadOnly, BufReadOnly, Buffer], + &full_config, + CpuShaderType::Skipped + ); + let bbox_clear = add_shader!(bbox_clear, [Uniform, Buffer], &empty); + let flatten = add_shader!( + flatten, + [Uniform, BufReadOnly, BufReadOnly, Buffer, Buffer, Buffer] + ); + let draw_reduce = add_shader!(draw_reduce, [Uniform, BufReadOnly, Buffer], &empty); + let draw_leaf = add_shader!( + draw_leaf, + [ + Uniform, + BufReadOnly, + BufReadOnly, + BufReadOnly, + Buffer, + Buffer, + Buffer, ], - )?; - let tile_alloc = engine.add_shader( - device, - "tile_alloc", - preprocess::preprocess(shader!("tile_alloc"), &empty, &imports).into(), - &[ - BindType::Uniform, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::Buffer, - BindType::Buffer, - BindType::Buffer, + &empty + ); + let clip_reduce = add_shader!( + clip_reduce, + [BufReadOnly, BufReadOnly, Buffer, Buffer], + &empty + ); + let clip_leaf = add_shader!( + clip_leaf, + [ + Uniform, + BufReadOnly, + BufReadOnly, + BufReadOnly, + BufReadOnly, + Buffer, + Buffer, ], - )?; - let path_count_setup = engine.add_shader( - device, - "path_count_setup", - preprocess::preprocess(shader!("path_count_setup"), &empty, &imports).into(), - &[BindType::Buffer, BindType::Buffer], - )?; - let path_count = engine.add_shader( - device, - "path_count", - preprocess::preprocess(shader!("path_count"), &full_config, &imports).into(), - &[ - BindType::Uniform, - BindType::Buffer, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::Buffer, - BindType::Buffer, + &empty + ); + let binning = add_shader!( + binning, + [ + Uniform, + BufReadOnly, + BufReadOnly, + BufReadOnly, + Buffer, + Buffer, + Buffer, + Buffer, ], - )?; - let backdrop = engine.add_shader( - device, - "backdrop_dyn", - preprocess::preprocess(shader!("backdrop_dyn"), &empty, &imports).into(), - &[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer], - )?; - let coarse = engine.add_shader( - device, - "coarse", - preprocess::preprocess(shader!("coarse"), &empty, &imports).into(), - &[ - BindType::Uniform, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::Buffer, - BindType::Buffer, - BindType::Buffer, + &empty + ); + let tile_alloc = add_shader!( + tile_alloc, + [Uniform, BufReadOnly, BufReadOnly, Buffer, Buffer, Buffer], + &empty + ); + let path_count_setup = add_shader!(path_count_setup, [Buffer, Buffer], &empty); + let path_count = add_shader!( + path_count, + [Uniform, Buffer, BufReadOnly, BufReadOnly, Buffer, Buffer] + ); + let backdrop = add_shader!( + backdrop_dyn, + [Uniform, BufReadOnly, Buffer], + &empty, + CpuShaderType::Present(cpu_shader::backdrop) + ); + let coarse = add_shader!( + coarse, + [ + Uniform, + BufReadOnly, + BufReadOnly, + BufReadOnly, + BufReadOnly, + BufReadOnly, + Buffer, + Buffer, + Buffer, ], - )?; - let path_tiling_setup = engine.add_shader( - device, - "path_tiling_setup", - preprocess::preprocess(shader!("path_tiling_setup"), &empty, &imports).into(), - &[BindType::Buffer, BindType::Buffer], - )?; - let path_tiling = engine.add_shader( - device, - "path_tiling", - preprocess::preprocess(shader!("path_tiling"), &empty, &imports).into(), - &[ - BindType::Buffer, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::Buffer, + &empty + ); + let path_tiling_setup = add_shader!(path_tiling_setup, [Buffer, Buffer], &empty); + let path_tiling = add_shader!( + path_tiling, + [ + Buffer, + BufReadOnly, + BufReadOnly, + BufReadOnly, + BufReadOnly, + Buffer, ], - )?; + &empty + ); let fine = match ANTIALIASING { - crate::AaConfig::Area => engine.add_shader( - device, - "fine", - preprocess::preprocess(shader!("fine"), &full_config, &imports).into(), - &[ - BindType::Uniform, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::Image(ImageFormat::Rgba8), - BindType::ImageRead(ImageFormat::Rgba8), - BindType::ImageRead(ImageFormat::Rgba8), + crate::AaConfig::Area => add_shader!( + fine, + [ + Uniform, + BufReadOnly, + BufReadOnly, + BufReadOnly, + Image(ImageFormat::Rgba8), + ImageRead(ImageFormat::Rgba8), + ImageRead(ImageFormat::Rgba8), ], - )?, - _ => { - engine.add_shader( - device, - "fine", - preprocess::preprocess(shader!("fine"), &full_config, &imports).into(), - &[ - BindType::Uniform, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::BufReadOnly, - BindType::Image(ImageFormat::Rgba8), - BindType::ImageRead(ImageFormat::Rgba8), - BindType::ImageRead(ImageFormat::Rgba8), - BindType::BufReadOnly, // mask buffer - ], - )? - } + &full_config, + CpuShaderType::Missing + ), + _ => add_shader!( + fine, + [ + Uniform, + BufReadOnly, + BufReadOnly, + BufReadOnly, + Image(ImageFormat::Rgba8), + ImageRead(ImageFormat::Rgba8), + ImageRead(ImageFormat::Rgba8), + BufReadOnly, // mask buffer + ], + &full_config, + CpuShaderType::Missing + ), }; Ok(FullShaders { pathtag_reduce, @@ -359,42 +321,10 @@ pub fn full_shaders(device: &Device, engine: &mut WgpuEngine) -> Result { ( diff --git a/src/util.rs b/src/util.rs index 85235e99c..4622d413c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -137,6 +137,7 @@ impl RenderContext { .await?; let features = adapter.features(); let limits = Limits::default(); + #[allow(unused_mut)] let mut maybe_features = wgpu::Features::CLEAR_TEXTURE; #[cfg(feature = "wgpu-profiler")] { diff --git a/src/wgpu_engine.rs b/src/wgpu_engine.rs index c5359c1bb..101535211 100644 --- a/src/wgpu_engine.rs +++ b/src/wgpu_engine.rs @@ -25,13 +25,46 @@ pub struct WgpuEngine { pool: ResourcePool, bind_map: BindMap, downloads: HashMap, + pub(crate) use_cpu: bool, } -struct Shader { +struct WgpuShader { pipeline: ComputePipeline, bind_group_layout: BindGroupLayout, +} + +pub enum CpuShaderType { + Present(fn(u32, &[CpuBinding])), + Missing, + Skipped, +} + +struct CpuShader { + shader: fn(u32, &[CpuBinding]), +} + +enum ShaderKind<'a> { + Wgpu(&'a WgpuShader), + Cpu(&'a CpuShader), +} + +struct Shader { + #[allow(dead_code)] label: &'static str, - cpu_shader: Option, + wgpu: Option, + cpu: Option, +} + +impl Shader { + fn select(&self) -> ShaderKind { + if let Some(cpu) = self.cpu.as_ref() { + ShaderKind::Cpu(cpu) + } else if let Some(wgpu) = self.wgpu.as_ref() { + ShaderKind::Wgpu(wgpu) + } else { + panic!("no available shader") + } + } } pub enum ExternalResource<'a> { @@ -90,8 +123,11 @@ enum TransientBuf<'a> { } impl WgpuEngine { - pub fn new() -> WgpuEngine { - Default::default() + pub fn new(use_cpu: bool) -> WgpuEngine { + Self { + use_cpu, + ..Default::default() + } } /// Add a shader. @@ -107,7 +143,36 @@ impl WgpuEngine { label: &'static str, wgsl: Cow<'static, str>, layout: &[BindType], + cpu_shader: CpuShaderType, ) -> Result { + let mut add = |shader| { + let id = self.shaders.len(); + self.shaders.push(shader); + Ok(ShaderId(id)) + }; + + if self.use_cpu { + match cpu_shader { + CpuShaderType::Present(shader) => { + return add(Shader { + wgpu: None, + cpu: Some(CpuShader { shader }), + label, + }); + } + // This shader is unused in CPU mode, create a dummy shader + CpuShaderType::Skipped => { + return add(Shader { + wgpu: None, + cpu: None, + label, + }); + } + // Create a GPU shader as we don't have a CPU shader + CpuShaderType::Missing => {} + } + } + let shader_module = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: Some(label), source: wgpu::ShaderSource::Wgsl(wgsl), @@ -176,20 +241,14 @@ impl WgpuEngine { module: &shader_module, entry_point: "main", }); - let cpu_shader = None; - let shader = Shader { - pipeline, - bind_group_layout, + add(Shader { + wgpu: Some(WgpuShader { + pipeline, + bind_group_layout, + }), + cpu: None, label, - cpu_shader, - }; - let id = self.shaders.len(); - self.shaders.push(shader); - Ok(ShaderId(id)) - } - - pub fn set_cpu_shader(&mut self, id: ShaderId, f: fn(u32, &[CpuBinding])) { - self.shaders[id.0].cpu_shader = Some(f); + }) } pub fn run_recording( @@ -318,82 +377,88 @@ impl WgpuEngine { Command::Dispatch(shader_id, wg_size, bindings) => { // println!("dispatching {:?} with {} bindings", wg_size, bindings.len()); let shader = &self.shaders[shader_id.0]; - if let Some(cpu_shader) = shader.cpu_shader { - // The current strategy is to run the CPU shader synchronously. This - // works because there is currently the added constraint that data - // can only flow from CPU to GPU, not the other way around. If and - // when we implement that, we will need to defer the execution. Of - // course, we will also need to wire up more async sychronization - // mechanisms, as the CPU dispatch can't run until the preceding - // command buffer submission completes (and, in WebGPU, the async - // mapping operations on the buffers completes). - let resources = - transient_map.create_cpu_resources(&mut self.bind_map, bindings); - cpu_shader(wg_size.0, &resources); - } else { - let bind_group = transient_map.create_bind_group( - &mut self.bind_map, - &mut self.pool, - device, - queue, - &mut encoder, - &shader.bind_group_layout, - bindings, - )?; - let mut cpass = encoder.begin_compute_pass(&Default::default()); - #[cfg(feature = "wgpu-profiler")] - profiler.begin_scope(shader.label, &mut cpass, device); - cpass.set_pipeline(&shader.pipeline); - cpass.set_bind_group(0, &bind_group, &[]); - cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2); - #[cfg(feature = "wgpu-profiler")] - profiler.end_scope(&mut cpass); + match shader.select() { + ShaderKind::Cpu(cpu_shader) => { + // The current strategy is to run the CPU shader synchronously. This + // works because there is currently the added constraint that data + // can only flow from CPU to GPU, not the other way around. If and + // when we implement that, we will need to defer the execution. Of + // course, we will also need to wire up more async sychronization + // mechanisms, as the CPU dispatch can't run until the preceding + // command buffer submission completes (and, in WebGPU, the async + // mapping operations on the buffers completes). + let resources = + transient_map.create_cpu_resources(&mut self.bind_map, bindings); + (cpu_shader.shader)(wg_size.0, &resources); + } + ShaderKind::Wgpu(wgpu_shader) => { + let bind_group = transient_map.create_bind_group( + &mut self.bind_map, + &mut self.pool, + device, + queue, + &mut encoder, + &wgpu_shader.bind_group_layout, + bindings, + )?; + let mut cpass = encoder.begin_compute_pass(&Default::default()); + #[cfg(feature = "wgpu-profiler")] + profiler.begin_scope(shader.label, &mut cpass, device); + cpass.set_pipeline(&wgpu_shader.pipeline); + cpass.set_bind_group(0, &bind_group, &[]); + cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2); + #[cfg(feature = "wgpu-profiler")] + profiler.end_scope(&mut cpass); + } } } Command::DispatchIndirect(shader_id, proxy, offset, bindings) => { let shader = &self.shaders[shader_id.0]; - if let Some(cpu_shader) = shader.cpu_shader { - // Same consideration as above about running the CPU shader synchronously. - let n_wg; - if let CpuBinding::BufferRW(b) = self.bind_map.get_cpu_buf(proxy.id) { - let slice = b.borrow(); - let indirect: &[u32] = bytemuck::cast_slice(&slice); - n_wg = indirect[0]; - } else { - panic!("indirect buffer missing from bind map"); + match shader.select() { + ShaderKind::Cpu(cpu_shader) => { + // Same consideration as above about running the CPU shader synchronously. + let n_wg; + if let CpuBinding::BufferRW(b) = self.bind_map.get_cpu_buf(proxy.id) { + let slice = b.borrow(); + let indirect: &[u32] = bytemuck::cast_slice(&slice); + n_wg = indirect[0]; + } else { + panic!("indirect buffer missing from bind map"); + } + let resources = + transient_map.create_cpu_resources(&mut self.bind_map, bindings); + (cpu_shader.shader)(n_wg, &resources); + } + ShaderKind::Wgpu(wgpu_shader) => { + let bind_group = transient_map.create_bind_group( + &mut self.bind_map, + &mut self.pool, + device, + queue, + &mut encoder, + &wgpu_shader.bind_group_layout, + bindings, + )?; + transient_map.materialize_gpu_buf_for_indirect( + &mut self.bind_map, + &mut self.pool, + device, + queue, + proxy, + ); + let mut cpass = encoder.begin_compute_pass(&Default::default()); + #[cfg(feature = "wgpu-profiler")] + profiler.begin_scope(shader.label, &mut cpass, device); + cpass.set_pipeline(&wgpu_shader.pipeline); + cpass.set_bind_group(0, &bind_group, &[]); + let buf = self + .bind_map + .get_gpu_buf(proxy.id) + .ok_or("buffer for indirect dispatch not in map")?; + cpass.dispatch_workgroups_indirect(buf, *offset); + #[cfg(feature = "wgpu-profiler")] + profiler.end_scope(&mut cpass); } - let resources = - transient_map.create_cpu_resources(&mut self.bind_map, bindings); - cpu_shader(n_wg, &resources); - } else { - let bind_group = transient_map.create_bind_group( - &mut self.bind_map, - &mut self.pool, - device, - queue, - &mut encoder, - &shader.bind_group_layout, - bindings, - )?; - transient_map.materialize_gpu_buf_for_indirect( - &mut self.bind_map, - &mut self.pool, - device, - queue, - proxy, - ); - let mut cpass = encoder.begin_compute_pass(&Default::default()); - #[cfg(feature = "wgpu-profiler")] - profiler.begin_scope(shader.label, &mut cpass, device); - cpass.set_pipeline(&shader.pipeline); - cpass.set_bind_group(0, &bind_group, &[]); - let buf = self - .bind_map - .get_gpu_buf(proxy.id) - .ok_or("buffer for indirect dispatch not in map")?; - cpass.dispatch_workgroups_indirect(buf, *offset); - #[cfg(feature = "wgpu-profiler")] - profiler.end_scope(&mut cpass); } } Command::Download(proxy) => {