linebender · raphlinus · Nov 5, 2022 · Nov 5, 2022 · Nov 6, 2022
diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl
@@ -14,101 +14,30 @@
 //
 // Also licensed under MIT license, at your choice.
 
-// Tile allocation (and zeroing of tiles)
-
-#import config
-#import bump
-#import drawtag
-#import tile
-
 @group(0) @binding(0)
-var<storage> config: Config;
+var<storage, read_write> bump: atomic<u32>;
 
 @group(0) @binding(1)
-var<storage> scene: array<u32>;
-
-@group(0) @binding(2)
-var<storage> draw_bboxes: array<vec4<f32>>;
-
-@group(0) @binding(3)
-var<storage, read_write> bump: BumpAllocators;
-
-@group(0) @binding(4)
-var<storage, read_write> paths: array<Path>;
-
-@group(0) @binding(5)
-var<storage, read_write> tiles: array<Tile>;
+var<storage, read_write> paths: array<u32>;
 
 let WG_SIZE = 256u;
 
-var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
 var<workgroup> sh_tile_offset: u32;
 
 @compute @workgroup_size(256)
 fn main(
     @builtin(global_invocation_id) global_id: vec3<u32>,
     @builtin(local_invocation_id) local_id: vec3<u32>,
 ) {
-    // scale factors useful for converting coordinates to tiles
-    // TODO: make into constants
-    let SX = 1.0 / f32(TILE_WIDTH);
-    let SY = 1.0 / f32(TILE_HEIGHT);
-
     let drawobj_ix = global_id.x;
-    var drawtag = DRAWTAG_NOP;
-    if drawobj_ix < config.n_drawobj {
-        drawtag = scene[config.drawtag_base + drawobj_ix];
-    }
-    var x0 = 0;
-    var y0 = 0;
-    var x1 = 0;
-    var y1 = 0;
-    if drawtag != DRAWTAG_NOP && drawtag != DRAWTAG_END_CLIP {
-        let bbox = draw_bboxes[drawobj_ix];
-        x0 = i32(floor(bbox.x * SX));
-        y0 = i32(floor(bbox.y * SY));
-        x1 = i32(ceil(bbox.z * SX));
-        y1 = i32(ceil(bbox.w * SY));
-    }
-    let ux0 = u32(clamp(x0, 0, i32(config.width_in_tiles)));
-    let uy0 = u32(clamp(y0, 0, i32(config.height_in_tiles)));
-    let ux1 = u32(clamp(x1, 0, i32(config.width_in_tiles)));
-    let uy1 = u32(clamp(y1, 0, i32(config.height_in_tiles)));
-    let tile_count = (ux1 - ux0) * (uy1 - uy0);
-    var total_tile_count = tile_count;
-    sh_tile_count[local_id.x] = tile_count;
-    for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
-        workgroupBarrier();
-        if local_id.x >= (1u << i) {
-            total_tile_count += sh_tile_count[local_id.x - (1u << i)];
-        }
-        workgroupBarrier();
-        sh_tile_count[local_id.x] = total_tile_count;
-    }
+    let tile_count_in = local_id.x + 1u;
     if local_id.x == WG_SIZE - 1u {
-        paths[drawobj_ix].tiles = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]);
+        sh_tile_offset = 1u + atomicAdd(&bump, tile_count_in);
     }
-    // Using storage barriers is a workaround for what appears to be a miscompilation
-    // when a normal workgroup-shared variable is used to broadcast the value.
-    storageBarrier();
-    let tile_offset = paths[drawobj_ix | (WG_SIZE - 1u)].tiles;
-    storageBarrier();
-    if drawobj_ix < config.n_drawobj {
-        let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u);
-        let bbox = vec4<u32>(ux0, uy0, ux1, uy1);
-        let path = Path(bbox, tile_offset + tile_subix);
-        paths[drawobj_ix] = path;
+    workgroupBarrier();
+    let tile_offset = sh_tile_offset;
+    if drawobj_ix < 3u {
+        paths[drawobj_ix] = tile_offset;
     }
 
-    // zero allocated memory
-    // Note: if the number of draw objects is small, utilization will be poor.
-    // There are two things that can be done to improve that. One would be a
-    // separate (indirect) dispatch. Another would be to have each workgroup
-    // process fewer draw objects than the number of threads in the wg.
-    let total_count = sh_tile_count[WG_SIZE - 1u];
-    for (var i = local_id.x; i < total_count; i += WG_SIZE) {
-        // Note: could format output buffer as u32 for even better load
-        // balancing, as does piet-gpu.
-        tiles[tile_offset + i] = Tile(0, 0u);
-    }
 }
diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs
@@ -139,6 +139,7 @@ impl Engine {
             label: None,
             entries: &entries,
         });
+        println!("{:?}", bind_group_layout);
         let compute_pipeline_layout =
             device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
                 label: None,

diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs
@@ -69,28 +69,15 @@ async fn do_render(
     queue: &Queue,
     engine: &mut Engine,
 ) -> Result<(), Box<dyn std::error::Error>> {
-    #[allow(unused)]
-    let shaders = shaders::init_shaders(device, engine)?;
-    let full_shaders = shaders::full_shaders(device, engine)?;
-    let scene = test_scene::gen_test_scene();
-    //test_scene::dump_scene_info(&scene);
-    //let (recording, buf) = render::render(&scene, &shaders);
-    let (recording, buf) = render::render_full(&scene, &full_shaders);
+    let tile_alloc = shaders::reduced_shader(device, engine);
+
+    let (recording, buf) = render::render_reduced(tile_alloc);
     let downloads = engine.run_recording(&device, &queue, &recording)?;
     let mapped = downloads.map();
     device.poll(wgpu::Maintain::Wait);
     let buf = mapped.get_mapped(buf).await?;
 
-    if false {
-        dump_buf(bytemuck::cast_slice(&buf));
-    } else {
-        let file = File::create("image.png")?;
-        let w = BufWriter::new(file);
-        let mut encoder = png::Encoder::new(w, 1024, 1024);
-        encoder.set_color(png::ColorType::Rgba);
-        let mut writer = encoder.write_header()?;
-        writer.write_image_data(&buf)?;
-    }
+    dump_buf(bytemuck::cast_slice(&buf));
     Ok(())
 }