linebender · DJMcNab · May 13, 2024 · Jun 6, 2024 · Jun 6, 2024 · Jun 7, 2024
diff --git a/examples/scenes/src/mmark.rs b/examples/scenes/src/mmark.rs
@@ -74,7 +74,8 @@ impl TestScene for MMark {
         let n = if c < 10 {
             (c + 1) * 1000
         } else {
-            ((c - 8) * 10000).min(120_000)
+            // The 190_000 scene can't be uploaded due to wgpu's limits
+            ((c - 8) * 10000).min(190_000)
         };
         self.resize(n);
         let mut rng = rand::thread_rng();

diff --git a/examples/with_winit/src/stats.rs b/examples/with_winit/src/stats.rs
@@ -75,7 +75,7 @@ impl Snapshot {
             labels.push(format!("ptcl: {}", bump.ptcl));
             labels.push(format!("tile: {}", bump.tile));
             labels.push(format!("segments: {}", bump.segments));
-            labels.push(format!("blend: {}", bump.blend));
+            labels.push(format!("blend: {}", bump.blend_spill));
         }
 
         // height / 2 is dedicated to the text labels and the rest is filled by the bar graph.

diff --git a/vello/src/lib.rs b/vello/src/lib.rs
diff --git a/vello/src/render.rs b/vello/src/render.rs
@@ -13,7 +13,9 @@ use crate::{AaConfig, RenderParams};
 #[cfg(feature = "wgpu")]
 use crate::Scene;
 
-use vello_encoding::{make_mask_lut, make_mask_lut_16, Encoding, Resolver, WorkgroupSize};
+use vello_encoding::{
+    make_mask_lut, make_mask_lut_16, BumpAllocators, Encoding, Resolver, WorkgroupSize,
+};
 
 /// State for a render in progress.
 pub struct Render {
@@ -82,8 +84,9 @@ pub(crate) fn render_full(
     resolver: &mut Resolver,
     shaders: &FullShaders,
     params: &RenderParams,
-) -> (Recording, ResourceProxy) {
-    render_encoding_full(scene.encoding(), resolver, shaders, params)
+    bump_sizes: BumpAllocators,
+) -> (Recording, ImageProxy, BufferProxy) {
+    render_encoding_full(scene.encoding(), resolver, shaders, params, bump_sizes)
 }
 
 #[cfg(feature = "wgpu")]
@@ -96,12 +99,15 @@ pub(crate) fn render_encoding_full(
     resolver: &mut Resolver,
     shaders: &FullShaders,
     params: &RenderParams,
-) -> (Recording, ResourceProxy) {
+    bump_sizes: BumpAllocators,
+) -> (Recording, ImageProxy, BufferProxy) {
     let mut render = Render::new();
-    let mut recording = render.render_encoding_coarse(encoding, resolver, shaders, params, false);
+    let mut recording =
+        render.render_encoding_coarse(encoding, resolver, shaders, params, bump_sizes, false);
     let out_image = render.out_image();
+    let bump_buf = render.bump_buf();
     render.record_fine(shaders, &mut recording);
-    (recording, out_image.into())
+    (recording, out_image, bump_buf)
 }
 
 impl Default for Render {
@@ -131,7 +137,8 @@ impl Render {
         resolver: &mut Resolver,
         shaders: &FullShaders,
         params: &RenderParams,
-        robust: bool,
+        bump_sizes: BumpAllocators,
+        #[cfg_attr(not(feature = "debug_layers"), allow(unused))] robust: bool,
     ) -> Recording {
         use vello_encoding::RenderConfig;
         let mut recording = Recording::default();
@@ -166,8 +173,20 @@ impl Render {
         for image in images.images {
             recording.write_image(image_atlas, image.1, image.2, image.0.clone());
         }
-        let cpu_config =
-            RenderConfig::new(&layout, params.width, params.height, &params.base_color);
+        let cpu_config = RenderConfig::new(
+            layout,
+            params.width,
+            params.height,
+            &params.base_color,
+            bump_sizes,
+        );
+        // log::debug!("Config: {{ lines_size: {:?}, binning_size: {:?}, tiles_size: {:?}, seg_counts_size: {:?}, segments_size: {:?}, ptcl_size: {:?} }}",
+        // cpu_config.gpu.lines_size,
+        // cpu_config.gpu.binning_size,
+        // cpu_config.gpu.tiles_size,
+        // cpu_config.gpu.seg_counts_size,
+        // cpu_config.gpu.segments_size,
+        // cpu_config.gpu.ptcl_size);
         let buffer_sizes = &cpu_config.buffer_sizes;
         let wg_counts = &cpu_config.workgroup_counts;
 
@@ -183,18 +202,28 @@ impl Render {
             recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)),
         );
         let info_bin_data_buf = ResourceProxy::new_buf(
-            buffer_sizes.bin_data.size_in_bytes() as u64,
+            buffer_sizes.bump_buffers.binning.size_in_bytes() as u64,
             "info_bin_data_buf",
         );
-        let tile_buf =
-            ResourceProxy::new_buf(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf");
-        let segments_buf =
-            ResourceProxy::new_buf(buffer_sizes.segments.size_in_bytes().into(), "segments_buf");
-        let ptcl_buf = ResourceProxy::new_buf(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf");
+        let tile_buf = ResourceProxy::new_buf(
+            buffer_sizes.bump_buffers.tile.size_in_bytes().into(),
+            "tile_buf",
+        );
+        let segments_buf = ResourceProxy::new_buf(
+            buffer_sizes.bump_buffers.segments.size_in_bytes().into(),
+            "segments_buf",
+        );
+        let ptcl_buf = ResourceProxy::new_buf(
+            buffer_sizes.bump_buffers.ptcl.size_in_bytes().into(),
+            "ptcl_buf",
+        );
         let reduced_buf = ResourceProxy::new_buf(
             buffer_sizes.path_reduced.size_in_bytes().into(),
             "reduced_buf",
         );
+        let bump_buf = BufferProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf");
+        let bump_buf = ResourceProxy::Buffer(bump_buf);
+        recording.dispatch(shaders.prepare, (1, 1, 1), [config_buf, bump_buf]);
         // TODO: really only need pathtag_wgs - 1
         recording.dispatch(
             shaders.pathtag_reduce,
@@ -255,11 +284,10 @@ impl Render {
             wg_counts.bbox_clear,
             [config_buf, path_bbox_buf],
         );
-        let bump_buf = BufferProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf");
-        recording.clear_all(bump_buf);
-        let bump_buf = ResourceProxy::Buffer(bump_buf);
-        let lines_buf =
-            ResourceProxy::new_buf(buffer_sizes.lines.size_in_bytes().into(), "lines_buf");
+        let lines_buf = ResourceProxy::new_buf(
+            buffer_sizes.bump_buffers.lines.size_in_bytes().into(),
+            "lines_buf",
+        );
         recording.dispatch(
             shaders.flatten,
             wg_counts.flatten,
@@ -390,7 +418,7 @@ impl Render {
             [bump_buf, indirect_count_buf.into()],
         );
         let seg_counts_buf = ResourceProxy::new_buf(
-            buffer_sizes.seg_counts.size_in_bytes().into(),
+            buffer_sizes.bump_buffers.seg_counts.size_in_bytes().into(),
             "seg_counts_buf",
         );
         recording.dispatch_indirect(
@@ -429,7 +457,7 @@ impl Render {
         recording.dispatch(
             shaders.path_tiling_setup,
             wg_counts.path_tiling_setup,
-            [bump_buf, indirect_count_buf.into(), ptcl_buf],
+            [config_buf, bump_buf, indirect_count_buf.into(), ptcl_buf],
         );
         recording.dispatch_indirect(
             shaders.path_tiling,
@@ -452,7 +480,7 @@ impl Render {
         recording.free_resource(path_buf);
         let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
         let blend_spill_buf = BufferProxy::new(
-            buffer_sizes.blend_spill.size_in_bytes().into(),
+            buffer_sizes.bump_buffers.blend_spill.size_in_bytes().into(),
             "blend_spill",
         );
         self.fine_wg_count = Some(wg_counts.fine);
@@ -469,7 +497,8 @@ impl Render {
             image_atlas: ResourceProxy::Image(image_atlas),
             out_image,
         });
-        if robust {
+        // TODO: This check is a massive hack to disable robustness if
+        if !shaders.pathtag_is_cpu {
             recording.download(*bump_buf.as_buf().unwrap());
         }
         recording.free_resource(bump_buf);
@@ -566,6 +595,7 @@ impl Render {
         recording.free_resource(fine.gradient_image);
         recording.free_resource(fine.image_atlas);
         recording.free_resource(fine.info_bin_data_buf);
+        recording.free_resource(fine.blend_spill_buf);
         // TODO: make mask buf persistent
         if let Some(mask_buf) = self.mask_buf.take() {
             recording.free_resource(mask_buf);

diff --git a/vello/src/scene.rs b/vello/src/scene.rs
@@ -14,7 +14,7 @@ use skrifa::{
     GlyphId, MetadataProvider, OutlineGlyphCollection,
 };
 #[cfg(feature = "bump_estimate")]
-use vello_encoding::BumpAllocatorMemory;
+use vello_encoding::BumpAllocators;
 use vello_encoding::{Encoding, Glyph, GlyphRun, Patch, Transform};
 
 // TODO - Document invariants and edge cases (#470)
@@ -49,7 +49,7 @@ impl Scene {
     /// Tally up the bump allocator estimate for the current state of the encoding,
     /// taking into account an optional `transform` applied to the entire scene.
     #[cfg(feature = "bump_estimate")]
-    pub fn bump_estimate(&self, transform: Option<Affine>) -> BumpAllocatorMemory {
+    pub fn bump_estimate(&self, transform: Option<Affine>) -> BumpAllocators {
         self.estimator
             .tally(transform.as_ref().map(Transform::from_kurbo).as_ref())
     }

diff --git a/vello/src/shaders.rs b/vello/src/shaders.rs
@@ -17,6 +17,7 @@ use crate::{
 
 // Shaders for the full pipeline
 pub struct FullShaders {
+    pub prepare: ShaderId,
     pub pathtag_reduce: ShaderId,
     pub pathtag_reduce2: ShaderId,
     pub pathtag_scan1: ShaderId,
@@ -101,6 +102,7 @@ pub(crate) fn full_shaders(
         };
     }
 
+    let prepare = add_shader!(prepare, [Buffer, Buffer]);
     let pathtag_reduce = add_shader!(pathtag_reduce, [Uniform, BufReadOnly, Buffer]);
     let pathtag_reduce2 = add_shader!(
         pathtag_reduce2,
@@ -194,7 +196,7 @@ pub(crate) fn full_shaders(
             Buffer,
         ]
     );
-    let path_tiling_setup = add_shader!(path_tiling_setup, [Buffer, Buffer, Buffer]);
+    let path_tiling_setup = add_shader!(path_tiling_setup, [Uniform, Buffer, Buffer, Buffer]);
     let path_tiling = add_shader!(
         path_tiling,
         [
@@ -249,6 +251,7 @@ pub(crate) fn full_shaders(
     };
 
     Ok(FullShaders {
+        prepare,
         pathtag_reduce,
         pathtag_reduce2,
         pathtag_scan,

diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs
@@ -409,7 +409,9 @@ impl WgpuEngine {
                     transient_map
                         .bufs
                         .insert(buf_proxy.id, TransientBuf::Cpu(bytes));
-                    let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST;
+                    // TODO: More principled way of working out usages
+                    let usage =
+                        BufferUsages::UNIFORM | BufferUsages::COPY_DST | BufferUsages::STORAGE;
                     // Same consideration as above
                     let buf = self
                         .pool
@@ -683,6 +685,12 @@ impl WgpuEngine {
                     let src_buf = self
                         .bind_map
                         .get_gpu_buf(proxy.id)
+                        .or_else(|| {
+                            transient_map.bufs.get(&proxy.id).and_then(|it| match it {
+                                TransientBuf::Cpu(_) => None,
+                                TransientBuf::Gpu(buf) => Some(*buf),
+                            })
+                        })
                         .ok_or(Error::UnavailableBufferUsed(proxy.name, "download"))?;
                     let usage = BufferUsages::MAP_READ | BufferUsages::COPY_DST;
                     let buf = self.pool.get_buf(proxy.size, "download", usage, device);
@@ -746,6 +754,10 @@ impl WgpuEngine {
         self.downloads.get(&buf.id)
     }
 
+    pub fn take_download(&mut self, buf: BufferProxy) -> Option<Buffer> {
+        self.downloads.remove(&buf.id)
+    }
+
     pub fn free_download(&mut self, buf: BufferProxy) {
         self.downloads.remove(&buf.id);
     }
@@ -940,6 +952,16 @@ impl ResourcePool {
         device: &Device,
     ) -> Buffer {
         let rounded_size = Self::size_class(size, SIZE_CLASS_BITS);
+        // let max_storage_buffer_binding_size =
+        //     device.limits().max_storage_buffer_binding_size.into();
+        // if rounded_size > max_storage_buffer_binding_size {
+        //     if size < max_storage_buffer_binding_size {
+        //         log::warn!("Would allocate buffer {name} to be larger than {max_storage_buffer_binding_size}. Clamped");
+        //         rounded_size = max_storage_buffer_binding_size;
+        //     } else {
+        //         log::warn!("Would allocate buffer {name} to be larger than {max_storage_buffer_binding_size}, which is not allowed");
+        //     }
+        // }
         let props = BufferProperties {
             size: rounded_size,
             usages: usage,