From 35fdf1e83daad47bb1271af72ff73284c309913a Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@google.com>
Date: Wed, 8 Nov 2023 22:07:55 -0800
Subject: [PATCH 1/6] [flatten]: Implement miter join

- Miter join style is now supported by the GPU stroker
- Slightly tweaked the miter limit test cases to to exercise a miter
  limit boundary condition and test inserting the miter join on both
  sides of a stroke within the same path.
---
 examples/scenes/src/test_scenes.rs |  7 +--
 shader/flatten.wgsl                | 72 +++++++++++++++++++++++++++---
 shader/shared/pathtag.wgsl         | 14 ++++++
 3 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/examples/scenes/src/test_scenes.rs b/examples/scenes/src/test_scenes.rs
index 8705f626d..74e5a937b 100644
--- a/examples/scenes/src/test_scenes.rs
+++ b/examples/scenes/src/test_scenes.rs
@@ -114,8 +114,9 @@ fn stroke_styles(sb: &mut SceneBuilder, params: &mut SceneParams) {
     ];
     let miter_stroke = [
         MoveTo((0., 0.).into()),
-        LineTo((90., 21.).into()),
-        LineTo((0., 42.).into()),
+        LineTo((90., 16.).into()),
+        LineTo((0., 31.).into()),
+        LineTo((90., 46.).into()),
     ];
     let closed_strokes = [
         MoveTo((0., 0.).into()),
@@ -131,7 +132,7 @@ fn stroke_styles(sb: &mut SceneBuilder, params: &mut SceneParams) {
     ];
     let cap_styles = [Cap::Butt, Cap::Square, Cap::Round];
     let join_styles = [Join::Bevel, Join::Miter, Join::Round];
-    let miter_limits = [4., 5., 0.1, 10.];
+    let miter_limits = [4., 6., 0.1, 10.];
 
     // Simple strokes with cap combinations
     let t = Affine::translate((60., 40.)) * Affine::scale(2.);
diff --git a/shader/flatten.wgsl b/shader/flatten.wgsl
index d15dcf8c4..bdd1a55e0 100644
--- a/shader/flatten.wgsl
+++ b/shader/flatten.wgsl
@@ -228,6 +228,63 @@ fn flatten_cubic(cubic: Cubic) {
     }
 }
 
+fn draw_join(
+    stroke: vec2f, path_ix: u32, style_flags: u32, p0: vec2f,
+    tan_prev: vec2f, tan_next: vec2f,
+    n_prev: vec2f, n_next: vec2f
+) -> vec4f {
+    var miter_pt_bbox = vec4(1e31, 1e31, -1e31, -1e31);
+    switch style_flags & STYLE_FLAGS_JOIN_MASK {
+        case /*STYLE_FLAGS_JOIN_BEVEL*/0u: {
+            let line_ix = atomicAdd(&bump.lines, 2u);
+            lines[line_ix]      = LineSoup(path_ix, p0 + n_prev, p0 + n_next);
+            lines[line_ix + 1u] = LineSoup(path_ix, p0 - n_next, p0 - n_prev);
+        }
+        case /*STYLE_FLAGS_JOIN_MITER*/0x10000000u: {
+            let c = tan_prev.x * tan_next.y - tan_prev.y * tan_next.x;
+            let d = dot(tan_prev, tan_next);
+            let hypot = length(vec2f(c, d));
+            let miter_limit = unpack2x16float(style_flags & STYLE_MITER_LIMIT_MASK).x;
+
+            var front0 = p0 + n_prev;
+            let front1 = p0 + n_next;
+            var back0 = p0 - n_next;
+            let back1 = p0 - n_prev;
+
+            if 2. * hypot < (hypot + d) * miter_limit * miter_limit && c != 0. {
+                let is_backside = c > 0.;
+                let fp_last = select(front0, back1, is_backside);
+                let fp_this = select(front1, back0, is_backside);
+                let p = select(front0, back0, is_backside);
+
+                let v = fp_this - fp_last;
+                let h = (tan_prev.x * v.y - tan_prev.y * v.x) / c;
+                let miter_pt = fp_this - tan_next * h;
+
+                let line_ix = atomicAdd(&bump.lines, 1u);
+                lines[line_ix] = LineSoup(path_ix, p, miter_pt);
+                if is_backside {
+                    back0 = miter_pt;
+                } else {
+                    front0 = miter_pt;
+                }
+                miter_pt_bbox = vec4(miter_pt, miter_pt);
+            }
+            let line_ix = atomicAdd(&bump.lines, 2u);
+            lines[line_ix]      = LineSoup(path_ix, front0, front1);
+            lines[line_ix + 1u] = LineSoup(path_ix, back0, back1);
+        }
+        case /*STYLE_FLAGS_JOIN_ROUND*/0x20000000u: {
+            // TODO: round join
+            let line_ix = atomicAdd(&bump.lines, 2u);
+            lines[line_ix]      = LineSoup(path_ix, p0 + n_prev, p0 + n_next);
+            lines[line_ix + 1u] = LineSoup(path_ix, p0 - n_next, p0 - n_prev);
+        }
+        default: {}
+    }
+    return miter_pt_bbox;
+}
+
 var<private> pathdata_base: u32;
 
 fn read_f32_point(ix: u32) -> vec2f {
@@ -442,17 +499,18 @@ fn main(
 
                 // Read the neighboring segment.
                 let neighbor = read_neighboring_segment(ix + 1u);
-                let n = normalize(cubic_end_normal(pts.p0, pts.p1, pts.p2, pts.p3)) * stroke;
+                let tan_prev = cubic_end_tangent(pts.p0, pts.p1, pts.p2, pts.p3);
+                let tan_next = neighbor.tangent;
+                let n_prev = normalize(tan_prev).yx * vec2f(-1., 1.) * stroke;
+                let n_next = normalize(tan_next).yx * vec2f(-1., 1.) * stroke;
                 if neighbor.do_join {
-                    // Draw join.
-                    let nn = normalize(vec2(-neighbor.tangent.y, neighbor.tangent.x)) * stroke;
-                    let line_ix = atomicAdd(&bump.lines, 2u);
-                    lines[line_ix]      = LineSoup(path_ix, pts.p3 + n, neighbor.p0 + nn);
-                    lines[line_ix + 1u] = LineSoup(path_ix, neighbor.p0 - nn, pts.p3 - n);
+                    let miter_pt = draw_join(stroke, path_ix, style_flags, pts.p3,
+                                             tan_prev, tan_next, n_prev, n_next);
+                    bbox = vec4(min(miter_pt.xy, bbox.xy), max(miter_pt.zw, bbox.zw));
                 } else {
                     // Draw end cap.
                     let line_ix = atomicAdd(&bump.lines, 1u);
-                    lines[line_ix] = LineSoup(path_ix, pts.p3 + n, pts.p3 - n);
+                    lines[line_ix] = LineSoup(path_ix, pts.p3 + n_prev, pts.p3 - n_prev);
                 }
             }
         } else {
diff --git a/shader/shared/pathtag.wgsl b/shader/shared/pathtag.wgsl
index 86f2565ae..11979f960 100644
--- a/shader/shared/pathtag.wgsl
+++ b/shader/shared/pathtag.wgsl
@@ -25,8 +25,22 @@ let PATH_TAG_SUBPATH_END = 4u;
 
 // Size of the `Style` data structure in words
 let STYLE_SIZE_IN_WORDS: u32 = 2u;
+
 let STYLE_FLAGS_STYLE: u32 = 0x80000000u;
 let STYLE_FLAGS_FILL: u32 = 0x40000000u;
+let STYLE_MITER_LIMIT_MASK: u32 = 0xFFFFu;
+
+let STYLE_FLAGS_START_CAP_MASK: u32 = 0x0C000000u;
+let STYLE_FLAGS_END_CAP_MASK: u32 = 0x03000000u;
+
+let STYLE_FLAGS_CAP_BUTT: u32 = 0u;
+let STYLE_FLAGS_CAP_SQUARE: u32 = 0x01000000u;
+let STYLE_FLAGS_CAP_ROUND: u32 = 0x02000000u;
+
+let STYLE_FLAGS_JOIN_MASK: u32 = 0x30000000u;
+let STYLE_FLAGS_JOIN_BEVEL: u32 = 0u;
+let STYLE_FLAGS_JOIN_MITER: u32 = 0x10000000u;
+let STYLE_FLAGS_JOIN_ROUND: u32 = 0x20000000u;
 
 // TODO: Declare the remaining STYLE flags here.
 

From 42e76921fead79eb8c482ddc0e06e54688370a3e Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@google.com>
Date: Fri, 10 Nov 2023 13:59:27 -0800
Subject: [PATCH 2/6] [flatten] Changes to transform handling

* Flattening (including for fills, offset curves, caps, and joins) is
  now performed in a curve's local coordinate space (pre-transform) and
  the transform is applied at the time a line is output. This may be
  unnecessary is for fills but it keeps the code mostly uniform.

* The line count estimate for subdivision factors in the scale factor
  which is decomposed from the transform matrix.

* The bounding box computation is now precise and purely based on the
  union of output line segments.
---
 shader/flatten.wgsl | 171 ++++++++++++++++++++++++--------------------
 1 file changed, 93 insertions(+), 78 deletions(-)

diff --git a/shader/flatten.wgsl b/shader/flatten.wgsl
index bdd1a55e0..160876f1a 100644
--- a/shader/flatten.wgsl
+++ b/shader/flatten.wgsl
@@ -52,7 +52,7 @@ fn approx_parabola_inv_integral(x: f32) -> f32 {
     return x * sqrt(1.0 - B + (B * B + 0.5 * x * x));
 }
 
-fn estimate_subdiv(p0: vec2f, p1: vec2f, p2: vec2f, sqrt_tol: f32) -> SubdivResult {
+fn estimate_subdiv(p0: vec2f, p1: vec2f, p2: vec2f, sqrt_tol: f32, transform_scale: f32) -> SubdivResult {
     let d01 = p1 - p0;
     let d12 = p2 - p1;
     let dd = d01 - d12;
@@ -60,7 +60,7 @@ fn estimate_subdiv(p0: vec2f, p1: vec2f, p2: vec2f, sqrt_tol: f32) -> SubdivResu
     let cross_inv = select(1.0 / cross, 1.0e9, abs(cross) < 1.0e-9);
     let x0 = dot(d01, dd) * cross_inv;
     let x2 = dot(d12, dd) * cross_inv;
-    let scale = abs(cross / (length(dd) * (x2 - x0)));
+    let scale = abs(transform_scale * cross / (length(dd) * (x2 - x0)));
 
     let a0 = approx_parabola_integral(x0);
     let a2 = approx_parabola_integral(x2);
@@ -128,7 +128,7 @@ fn cubic_end_normal(p0: vec2f, p1: vec2f, p2: vec2f, p3: vec2f) -> vec2f {
 
 let MAX_QUADS = 16u;
 
-fn flatten_cubic(cubic: Cubic) {
+fn flatten_cubic(cubic: Cubic, transform: Transform, offset: f32) {
     let p0 = cubic.p0;
     let p1 = cubic.p1;
     let p2 = cubic.p2;
@@ -139,7 +139,9 @@ fn flatten_cubic(cubic: Cubic) {
     let Q_ACCURACY = ACCURACY * 0.1;
     let REM_ACCURACY = ACCURACY - Q_ACCURACY;
     let MAX_HYPOT2 = 432.0 * Q_ACCURACY * Q_ACCURACY;
-    var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1u);
+    let scale = vec2(length(transform.mat.xz), length(transform.mat.yw));
+    let scale_factor = max(scale.x, scale.y);
+    var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0)) * scale_factor), 1u);
     n_quads = min(n_quads, MAX_QUADS);
     var keep_params: array<SubdivResult, MAX_QUADS>;
     var val = 0.0;
@@ -151,20 +153,16 @@ fn flatten_cubic(cubic: Cubic) {
         var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step);
         qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
 
-        // HACK: this increases subdivision count as a function of the stroke width for shitty
-        // strokes. This isn't systematic or correct and shouldn't be relied on in the long term.
-        var tol = sqrt(REM_ACCURACY);
-        if cubic.flags == CUBIC_IS_STROKE {
-            tol *= min(1000., dot(cubic.stroke, cubic.stroke));
-        }
-        let params = estimate_subdiv(qp0, qp1, qp2, tol);
+        // TODO: Estimate an accurate subdivision count for strokes, handling cusps.
+        let tol = sqrt(REM_ACCURACY);
+        let params = estimate_subdiv(qp0, qp1, qp2, tol, scale_factor);
         keep_params[i] = params;
         val += params.val;
         qp0 = qp2;
     }
 
-    // HACK: normal vector used to offset line segments for shitty stroke handling.
-    var n0 = cubic_start_normal(p0, p1, p2, p3) * cubic.stroke;
+    // Normal vector to calculate the start point of the offset curve.
+    var n0 = offset * cubic_start_normal(p0, p1, p2, p3);
 
     let n = max(u32(ceil(val * (0.5 / sqrt(REM_ACCURACY)))), 1u);
     var lp0 = p0;
@@ -208,16 +206,15 @@ fn flatten_cubic(cubic: Cubic) {
                 } else {
                     n1 = eval_quad_normal(qp0, qp1, qp2, t1);
                 }
-                n1 *= cubic.stroke;
-                let line_ix = atomicAdd(&bump.lines, 2u);
-                lines[line_ix]      = LineSoup(cubic.path_ix, lp0 + n0, lp1 + n1);
-                lines[line_ix + 1u] = LineSoup(cubic.path_ix, lp1 - n1, lp0 - n0);
+                n1 *= offset;
+                output_two_lines_with_transform(cubic.path_ix,
+                                                lp0 + n0, lp1 + n1,
+                                                lp1 - n1, lp0 - n0,
+                                                transform);
                 n0 = n1;
             } else {
                 // Output line segment lp0..lp1
-                let line_ix = atomicAdd(&bump.lines, 1u);
-                // TODO: check failure
-                lines[line_ix] = LineSoup(cubic.path_ix, lp0, lp1);
+                output_line_with_transform(cubic.path_ix, lp0, lp1, transform);
             }
             n_out += 1u;
             val_target += v_step;
@@ -231,14 +228,15 @@ fn flatten_cubic(cubic: Cubic) {
 fn draw_join(
     stroke: vec2f, path_ix: u32, style_flags: u32, p0: vec2f,
     tan_prev: vec2f, tan_next: vec2f,
-    n_prev: vec2f, n_next: vec2f
-) -> vec4f {
-    var miter_pt_bbox = vec4(1e31, 1e31, -1e31, -1e31);
+    n_prev: vec2f, n_next: vec2f,
+    transform: Transform,
+) {
     switch style_flags & STYLE_FLAGS_JOIN_MASK {
         case /*STYLE_FLAGS_JOIN_BEVEL*/0u: {
-            let line_ix = atomicAdd(&bump.lines, 2u);
-            lines[line_ix]      = LineSoup(path_ix, p0 + n_prev, p0 + n_next);
-            lines[line_ix + 1u] = LineSoup(path_ix, p0 - n_next, p0 - n_prev);
+            output_two_lines_with_transform(path_ix,
+                                            p0 + n_prev, p0 + n_next,
+                                            p0 - n_next, p0 - n_prev,
+                                            transform);
         }
         case /*STYLE_FLAGS_JOIN_MITER*/0x10000000u: {
             let c = tan_prev.x * tan_next.y - tan_prev.y * tan_next.x;
@@ -260,33 +258,27 @@ fn draw_join(
                 let v = fp_this - fp_last;
                 let h = (tan_prev.x * v.y - tan_prev.y * v.x) / c;
                 let miter_pt = fp_this - tan_next * h;
+                output_line_with_transform(path_ix, p, miter_pt, transform);
 
-                let line_ix = atomicAdd(&bump.lines, 1u);
-                lines[line_ix] = LineSoup(path_ix, p, miter_pt);
                 if is_backside {
                     back0 = miter_pt;
                 } else {
                     front0 = miter_pt;
                 }
-                miter_pt_bbox = vec4(miter_pt, miter_pt);
             }
-            let line_ix = atomicAdd(&bump.lines, 2u);
-            lines[line_ix]      = LineSoup(path_ix, front0, front1);
-            lines[line_ix + 1u] = LineSoup(path_ix, back0, back1);
+            output_two_lines_with_transform(path_ix, front0, front1, back0, back1, transform);
         }
         case /*STYLE_FLAGS_JOIN_ROUND*/0x20000000u: {
             // TODO: round join
-            let line_ix = atomicAdd(&bump.lines, 2u);
-            lines[line_ix]      = LineSoup(path_ix, p0 + n_prev, p0 + n_next);
-            lines[line_ix + 1u] = LineSoup(path_ix, p0 - n_next, p0 - n_prev);
+            output_two_lines_with_transform(path_ix,
+                                            p0 + n_prev, p0 + n_next,
+                                            p0 - n_next, p0 - n_prev,
+                                            transform);
         }
         default: {}
     }
-    return miter_pt_bbox;
 }
 
-var<private> pathdata_base: u32;
-
 fn read_f32_point(ix: u32) -> vec2f {
     let x = bitcast<f32>(scene[pathdata_base + ix]);
     let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
@@ -352,7 +344,7 @@ struct CubicPoints {
     p3: vec2f,
 }
 
-fn read_path_segment(tag: PathTagData, transform: Transform, is_stroke: bool) -> CubicPoints {
+fn read_path_segment(tag: PathTagData, is_stroke: bool) -> CubicPoints {
     var p0: vec2f;
     var p1: vec2f;
     var p2: vec2f;
@@ -391,12 +383,9 @@ fn read_path_segment(tag: PathTagData, transform: Transform, is_stroke: bool) ->
         // This is encoded this way because encoding this as a lineto would require adding a moveto,
         // which would terminate the subpath too early (by setting the SUBPATH_END on the
         // segment preceding the cap marker). This scheme is only used for strokes.
-        p0 = transform_apply(transform, p1);
-        p1 = transform_apply(transform, p2);
+        p0 = p1;
+        p1 = p2;
         seg_type = PATH_TAG_LINETO;
-    } else {
-        p0 = transform_apply(transform, p0);
-        p1 = transform_apply(transform, p1);
     }
 
     // Degree-raise
@@ -404,20 +393,47 @@ fn read_path_segment(tag: PathTagData, transform: Transform, is_stroke: bool) ->
         p3 = p1;
         p2 = mix(p3, p0, 1.0 / 3.0);
         p1 = mix(p0, p3, 1.0 / 3.0);
-    } else if seg_type >= PATH_TAG_QUADTO {
-        p2 = transform_apply(transform, p2);
-        if seg_type == PATH_TAG_CUBICTO {
-            p3 = transform_apply(transform, p3);
-        } else {
-            p3 = p2;
-            p2 = mix(p1, p2, 1.0 / 3.0);
-            p1 = mix(p1, p0, 1.0 / 3.0);
-        }
+    } else if seg_type == PATH_TAG_QUADTO {
+        p3 = p2;
+        p2 = mix(p1, p2, 1.0 / 3.0);
+        p1 = mix(p1, p0, 1.0 / 3.0);
     }
 
     return CubicPoints(p0, p1, p2, p3);
 }
 
+fn output_line(path_ix: u32, p0: vec2f, p1: vec2f) {
+    let line_ix = atomicAdd(&bump.lines, 1u);
+    bbox = vec4(min(bbox.xy, min(p0, p1)), max(bbox.zw, max(p0, p1)));
+    lines[line_ix] = LineSoup(path_ix, p0, p1);
+}
+
+fn output_line_with_transform(path_ix: u32, p0: vec2f, p1: vec2f, transform: Transform) {
+    let line_ix = atomicAdd(&bump.lines, 1u);
+    let tp0 = transform_apply(transform, p0);
+    let tp1 = transform_apply(transform, p1);
+    bbox = vec4(min(bbox.xy, min(tp0, tp1)), max(bbox.zw, max(tp0, tp1)));
+    lines[line_ix] = LineSoup(path_ix, tp0, tp1);
+}
+
+fn output_two_lines_with_transform(
+    path_ix: u32,
+    p00: vec2f, p01: vec2f,
+    p10: vec2f, p11: vec2f,
+    transform: Transform
+) {
+    let line_ix = atomicAdd(&bump.lines, 2u);
+    let tp00 = transform_apply(transform, p00);
+    let tp01 = transform_apply(transform, p01);
+    let tp10 = transform_apply(transform, p10);
+    let tp11 = transform_apply(transform, p11);
+
+    bbox = vec4(min(bbox.xy, min(tp00, tp01)), max(bbox.zw, max(tp00, tp01)));
+    bbox = vec4(min(bbox.xy, min(tp10, tp11)), max(bbox.zw, max(tp10, tp11)));
+    lines[line_ix]      = LineSoup(path_ix, tp00, tp01);
+    lines[line_ix + 1u] = LineSoup(path_ix, tp10, tp11);
+}
+
 struct NeighboringSegment {
     do_join: bool,
     p0: vec2f,
@@ -428,8 +444,7 @@ struct NeighboringSegment {
 
 fn read_neighboring_segment(ix: u32) -> NeighboringSegment {
     let tag = compute_tag_monoid(ix);
-    let transform = read_transform(config.transform_base, tag.monoid.trans_ix);
-    let pts = read_path_segment(tag, transform, true);
+    let pts = read_path_segment(tag, true);
 
     let is_closed = (tag.tag_byte & PATH_TAG_SEG_TYPE) == PATH_TAG_LINETO;
     let is_stroke_cap_marker = (tag.tag_byte & PATH_TAG_SUBPATH_END) != 0u;
@@ -439,6 +454,13 @@ fn read_neighboring_segment(ix: u32) -> NeighboringSegment {
     return NeighboringSegment(do_join, p0, tangent);
 }
 
+// `pathdata_base` is decoded once and reused by helpers above.
+var<private> pathdata_base: u32;
+
+// This is the bounding box of the shape flattened by a single shader invocation. This is adjusted
+// as lines are generated.
+var<private> bbox: vec4f;
+
 @compute @workgroup_size(256)
 fn main(
     @builtin(global_invocation_id) global_id: vec3<u32>,
@@ -446,6 +468,7 @@ fn main(
 ) {
     let ix = global_id.x;
     pathdata_base = config.pathdata_base;
+    bbox = vec4(1e31, 1e31, -1e31, -1e31);
 
     let tag = compute_tag_monoid(ix);
     let path_ix = tag.monoid.path_ix;
@@ -465,56 +488,48 @@ fn main(
     if seg_type != 0u {
         let is_stroke = (style_flags & STYLE_FLAGS_STYLE) != 0u;
         let transform = read_transform(config.transform_base, trans_ix);
-        let pts = read_path_segment(tag, transform, is_stroke);
-        var bbox = vec4(min(pts.p0, pts.p1), max(pts.p0, pts.p1));
-        bbox = vec4(min(bbox.xy, pts.p2), max(bbox.zw, pts.p2));
-        bbox = vec4(min(bbox.xy, pts.p3), max(bbox.zw, pts.p3));
+        let pts = read_path_segment(tag, is_stroke);
 
         var stroke = vec2(0.0, 0.0);
         if is_stroke {
             let linewidth = bitcast<f32>(scene[config.style_base + style_ix + 1u]);
+            let offset = 0.5 * linewidth;
+
             // See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
             // This is the correct bounding box, but we're not handling rendering
             // in the isotropic case, so it may mismatch.
-            stroke = 0.5 * linewidth * vec2(length(transform.mat.xz), length(transform.mat.yw));
-            bbox += vec4(-stroke, stroke);
+            stroke = offset * vec2(length(transform.mat.xz), length(transform.mat.yw));
+
             let is_open = (tag.tag_byte & PATH_TAG_SEG_TYPE) != PATH_TAG_LINETO;
             let is_stroke_cap_marker = (tag.tag_byte & PATH_TAG_SUBPATH_END) != 0u;
             if is_stroke_cap_marker {
                 if is_open {
-                    let n = cubic_start_normal(pts.p0, pts.p1, pts.p2, pts.p3) * stroke;
-
-                    // Draw start cap
-                    let line_ix = atomicAdd(&bump.lines, 1u);
-                    lines[line_ix] = LineSoup(path_ix, pts.p0 - n, pts.p0 + n);
+                    // Draw start cap (butt)
+                    let n = offset * cubic_start_normal(pts.p0, pts.p1, pts.p2, pts.p3);
+                    output_line_with_transform(path_ix, pts.p0 - n, pts.p0 + n, transform);
                 } else {
                     // Don't draw anything if the path is closed.
                 }
-                // The stroke cap marker does not contribute to the path's bounding box. The stroke
-                // width is accounted for when computing the bbox for regular segments.
-                bbox = vec4(1., 1., -1., -1.);
             } else {
                 // Render offset curves
-                flatten_cubic(Cubic(pts.p0, pts.p1, pts.p2, pts.p3, stroke, path_ix, u32(is_stroke)));
+                flatten_cubic(Cubic(pts.p0, pts.p1, pts.p2, pts.p3, stroke, path_ix, u32(is_stroke)), transform, offset);
 
                 // Read the neighboring segment.
                 let neighbor = read_neighboring_segment(ix + 1u);
                 let tan_prev = cubic_end_tangent(pts.p0, pts.p1, pts.p2, pts.p3);
                 let tan_next = neighbor.tangent;
-                let n_prev = normalize(tan_prev).yx * vec2f(-1., 1.) * stroke;
-                let n_next = normalize(tan_next).yx * vec2f(-1., 1.) * stroke;
+                let n_prev = offset * (normalize(tan_prev).yx * vec2f(-1., 1.));
+                let n_next = offset * (normalize(tan_next).yx * vec2f(-1., 1.));
                 if neighbor.do_join {
-                    let miter_pt = draw_join(stroke, path_ix, style_flags, pts.p3,
-                                             tan_prev, tan_next, n_prev, n_next);
-                    bbox = vec4(min(miter_pt.xy, bbox.xy), max(miter_pt.zw, bbox.zw));
+                    draw_join(stroke, path_ix, style_flags, pts.p3,
+                              tan_prev, tan_next, n_prev, n_next, transform);
                 } else {
                     // Draw end cap.
-                    let line_ix = atomicAdd(&bump.lines, 1u);
-                    lines[line_ix] = LineSoup(path_ix, pts.p3 + n_prev, pts.p3 - n_prev);
+                    output_line_with_transform(path_ix, pts.p3 + n_prev, pts.p3 - n_prev, transform);
                 }
             }
         } else {
-            flatten_cubic(Cubic(pts.p0, pts.p1, pts.p2, pts.p3, stroke, path_ix, u32(is_stroke)));
+            flatten_cubic(Cubic(pts.p0, pts.p1, pts.p2, pts.p3, stroke, path_ix, u32(is_stroke)), transform, 0.);
         }
         // Update bounding box using atomics only. Computing a monoid is a
         // potential future optimization.

From a86e827ced4e67527343b72c5c650bbc3b4839c9 Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@google.com>
Date: Fri, 10 Nov 2023 14:04:16 -0800
Subject: [PATCH 3/6] [test_scenes] Add skew and non-uniform transform variants
 of stroke_styles

---
 examples/scenes/src/test_scenes.rs | 226 +++++++++++++++--------------
 1 file changed, 119 insertions(+), 107 deletions(-)

diff --git a/examples/scenes/src/test_scenes.rs b/examples/scenes/src/test_scenes.rs
index 74e5a937b..7695d95c0 100644
--- a/examples/scenes/src/test_scenes.rs
+++ b/examples/scenes/src/test_scenes.rs
@@ -33,7 +33,17 @@ pub fn test_scenes() -> SceneSet {
     let scenes = vec![
         scene!(splash_with_tiger(), "splash_with_tiger", false),
         scene!(funky_paths),
-        scene!(stroke_styles),
+        scene!(stroke_styles(Affine::IDENTITY), "stroke_styles", false),
+        scene!(
+            stroke_styles(Affine::scale_non_uniform(1.2, 0.7)),
+            "stroke_styles (non-uniform scale)",
+            false
+        ),
+        scene!(
+            stroke_styles(Affine::skew(0.5, 0.)),
+            "stroke_styles (skew)",
+            false
+        ),
         scene!(tricky_strokes),
         scene!(fill_types),
         scene!(cardioid_and_friends),
@@ -98,143 +108,145 @@ fn funky_paths(sb: &mut SceneBuilder, _: &mut SceneParams) {
     );
 }
 
-fn stroke_styles(sb: &mut SceneBuilder, params: &mut SceneParams) {
+fn stroke_styles(transform: Affine) -> impl FnMut(&mut SceneBuilder, &mut SceneParams) {
     use PathEl::*;
-    let colors = [
-        Color::rgb8(140, 181, 236),
-        Color::rgb8(246, 236, 202),
-        Color::rgb8(201, 147, 206),
-        Color::rgb8(150, 195, 160),
-    ];
-    let simple_stroke = [MoveTo((0., 0.).into()), LineTo((100., 0.).into())];
-    let join_stroke = [
-        MoveTo((0., 0.).into()),
-        CurveTo((20., 0.).into(), (42.5, 5.).into(), (50., 25.).into()),
-        CurveTo((57.5, 5.).into(), (80., 0.).into(), (100., 0.).into()),
-    ];
-    let miter_stroke = [
-        MoveTo((0., 0.).into()),
-        LineTo((90., 16.).into()),
-        LineTo((0., 31.).into()),
-        LineTo((90., 46.).into()),
-    ];
-    let closed_strokes = [
-        MoveTo((0., 0.).into()),
-        LineTo((90., 21.).into()),
-        LineTo((0., 42.).into()),
-        ClosePath,
-        MoveTo((200., 0.).into()),
-        CurveTo((100., 42.).into(), (300., 42.).into(), (200., 0.).into()),
-        ClosePath,
-        MoveTo((290., 0.).into()),
-        CurveTo((200., 42.).into(), (400., 42.).into(), (310., 0.).into()),
-        ClosePath,
-    ];
-    let cap_styles = [Cap::Butt, Cap::Square, Cap::Round];
-    let join_styles = [Join::Bevel, Join::Miter, Join::Round];
-    let miter_limits = [4., 6., 0.1, 10.];
+    move |sb, params| {
+        let colors = [
+            Color::rgb8(140, 181, 236),
+            Color::rgb8(246, 236, 202),
+            Color::rgb8(201, 147, 206),
+            Color::rgb8(150, 195, 160),
+        ];
+        let simple_stroke = [MoveTo((0., 0.).into()), LineTo((100., 0.).into())];
+        let join_stroke = [
+            MoveTo((0., 0.).into()),
+            CurveTo((20., 0.).into(), (42.5, 5.).into(), (50., 25.).into()),
+            CurveTo((57.5, 5.).into(), (80., 0.).into(), (100., 0.).into()),
+        ];
+        let miter_stroke = [
+            MoveTo((0., 0.).into()),
+            LineTo((90., 16.).into()),
+            LineTo((0., 31.).into()),
+            LineTo((90., 46.).into()),
+        ];
+        let closed_strokes = [
+            MoveTo((0., 0.).into()),
+            LineTo((90., 21.).into()),
+            LineTo((0., 42.).into()),
+            ClosePath,
+            MoveTo((200., 0.).into()),
+            CurveTo((100., 72.).into(), (300., 72.).into(), (200., 0.).into()),
+            ClosePath,
+            MoveTo((290., 0.).into()),
+            CurveTo((200., 72.).into(), (400., 72.).into(), (310., 0.).into()),
+            ClosePath,
+        ];
+        let cap_styles = [Cap::Butt, Cap::Square, Cap::Round];
+        let join_styles = [Join::Bevel, Join::Miter, Join::Round];
+        let miter_limits = [4., 6., 0.1, 10.];
 
-    // Simple strokes with cap combinations
-    let t = Affine::translate((60., 40.)) * Affine::scale(2.);
-    let mut y = 0.;
-    let mut color_idx = 0;
-    for start in cap_styles {
-        for end in cap_styles {
+        // Simple strokes with cap combinations
+        let t = Affine::translate((60., 40.)) * Affine::scale(2.);
+        let mut y = 0.;
+        let mut color_idx = 0;
+        for start in cap_styles {
+            for end in cap_styles {
+                params.text.add(
+                    sb,
+                    None,
+                    12.,
+                    None,
+                    Affine::translate((0., y)) * t,
+                    &format!("Start cap: {:?}, End cap: {:?}", start, end),
+                );
+                sb.stroke(
+                    &Stroke::new(20.).with_start_cap(start).with_end_cap(end),
+                    Affine::translate((0., y + 30.)) * t * transform,
+                    colors[color_idx],
+                    None,
+                    &simple_stroke,
+                );
+                y += 180.;
+                color_idx = (color_idx + 1) % colors.len();
+            }
+        }
+
+        // Cap and join combinations
+        let t = Affine::translate((500., 0.)) * t;
+        y = 0.;
+        for cap in cap_styles {
+            for join in join_styles {
+                params.text.add(
+                    sb,
+                    None,
+                    12.,
+                    None,
+                    Affine::translate((0., y)) * t,
+                    &format!("Caps: {:?}, Joins: {:?}", cap, join),
+                );
+                sb.stroke(
+                    &Stroke::new(20.).with_caps(cap).with_join(join),
+                    Affine::translate((0., y + 30.)) * t * transform,
+                    colors[color_idx],
+                    None,
+                    &join_stroke,
+                );
+                y += 185.;
+                color_idx = (color_idx + 1) % colors.len();
+            }
+        }
+
+        // Miter limit
+        let t = Affine::translate((500., 0.)) * t;
+        y = 0.;
+        for ml in miter_limits {
             params.text.add(
                 sb,
                 None,
                 12.,
                 None,
                 Affine::translate((0., y)) * t,
-                &format!("Start cap: {:?}, End cap: {:?}", start, end),
+                &format!("Miter limit: {}", ml),
             );
             sb.stroke(
-                &Stroke::new(20.).with_start_cap(start).with_end_cap(end),
-                Affine::translate((0., y + 30.)) * t,
+                &Stroke::new(10.)
+                    .with_caps(Cap::Butt)
+                    .with_join(Join::Miter)
+                    .with_miter_limit(ml),
+                Affine::translate((0., y + 30.)) * t * transform,
                 colors[color_idx],
                 None,
-                &simple_stroke,
+                &miter_stroke,
             );
             y += 180.;
             color_idx = (color_idx + 1) % colors.len();
         }
-    }
 
-    // Cap and join combinations
-    let t = Affine::translate((500., 0.)) * t;
-    y = 0.;
-    for cap in cap_styles {
-        for join in join_styles {
+        // Closed paths
+        for (i, join) in join_styles.iter().enumerate() {
             params.text.add(
                 sb,
                 None,
                 12.,
                 None,
                 Affine::translate((0., y)) * t,
-                &format!("Caps: {:?}, Joins: {:?}", cap, join),
+                &format!("Closed path with join: {:?}", join),
             );
+            // The cap style is not important since a closed path shouldn't have any caps.
             sb.stroke(
-                &Stroke::new(20.).with_caps(cap).with_join(join),
-                Affine::translate((0., y + 30.)) * t,
+                &Stroke::new(10.)
+                    .with_caps(cap_styles[i])
+                    .with_join(*join)
+                    .with_miter_limit(5.),
+                Affine::translate((0., y + 30.)) * t * transform,
                 colors[color_idx],
                 None,
-                &join_stroke,
+                &closed_strokes,
             );
-            y += 185.;
+            y += 180.;
             color_idx = (color_idx + 1) % colors.len();
         }
     }
-
-    // Miter limit
-    let t = Affine::translate((500., 0.)) * t;
-    y = 0.;
-    for ml in miter_limits {
-        params.text.add(
-            sb,
-            None,
-            12.,
-            None,
-            Affine::translate((0., y)) * t,
-            &format!("Miter limit: {}", ml),
-        );
-        sb.stroke(
-            &Stroke::new(10.)
-                .with_caps(Cap::Butt)
-                .with_join(Join::Miter)
-                .with_miter_limit(ml),
-            Affine::translate((0., y + 30.)) * t,
-            colors[color_idx],
-            None,
-            &miter_stroke,
-        );
-        y += 180.;
-        color_idx = (color_idx + 1) % colors.len();
-    }
-
-    // Closed paths
-    for (i, join) in join_styles.iter().enumerate() {
-        params.text.add(
-            sb,
-            None,
-            12.,
-            None,
-            Affine::translate((0., y)) * t,
-            &format!("Closed path with join: {:?}", join),
-        );
-        // The cap style is not important since a closed path shouldn't have any caps.
-        sb.stroke(
-            &Stroke::new(10.)
-                .with_caps(cap_styles[i])
-                .with_join(*join)
-                .with_miter_limit(5.),
-            Affine::translate((0., y + 30.)) * t,
-            colors[color_idx],
-            None,
-            &closed_strokes,
-        );
-        y += 180.;
-        color_idx = (color_idx + 1) % colors.len();
-    }
 }
 
 // This test has been adapted from Skia's "trickycubicstrokes" GM slide which can be found at

From 4f84ac16f8995c86549618487c03868ed949b08f Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@google.com>
Date: Tue, 14 Nov 2023 15:43:27 -0800
Subject: [PATCH 4/6] Increase skew factor in stroke styles test

---
 examples/scenes/src/test_scenes.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/scenes/src/test_scenes.rs b/examples/scenes/src/test_scenes.rs
index 7695d95c0..c617e5d78 100644
--- a/examples/scenes/src/test_scenes.rs
+++ b/examples/scenes/src/test_scenes.rs
@@ -40,7 +40,7 @@ pub fn test_scenes() -> SceneSet {
             false
         ),
         scene!(
-            stroke_styles(Affine::skew(0.5, 0.)),
+            stroke_styles(Affine::skew(1., 0.)),
             "stroke_styles (skew)",
             false
         ),

From 2de6c4986d94fbe1788e4b5cb6fe2643748afd9b Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@google.com>
Date: Tue, 14 Nov 2023 15:47:30 -0800
Subject: [PATCH 5/6] Address review comments; document flattening

---
 shader/flatten.wgsl | 129 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 98 insertions(+), 31 deletions(-)

diff --git a/shader/flatten.wgsl b/shader/flatten.wgsl
index 160876f1a..6e3f8dc0e 100644
--- a/shader/flatten.wgsl
+++ b/shader/flatten.wgsl
@@ -52,7 +52,24 @@ fn approx_parabola_inv_integral(x: f32) -> f32 {
     return x * sqrt(1.0 - B + (B * B + 0.5 * x * x));
 }
 
-fn estimate_subdiv(p0: vec2f, p1: vec2f, p2: vec2f, sqrt_tol: f32, transform_scale: f32) -> SubdivResult {
+// Notes on fractional subdivision:
+// --------------------------------
+// The core of the existing flattening algorithm (see `flatten_cubic` below) is to approximate the
+// original cubic Bézier into a simpler curve (quadratic Bézier), subdivided to meet the error
+// bound, then apply flattening to that. Doing this the simplest way would put a subdivision point
+// in the output at each subdivision point here. That in general does not match where the
+// subdivision points would go in an optimal flattening. Fractional subdivision addresses that
+// problem.
+//
+// The return value of this function (`val`) represents this fractional subdivision count and has
+// the following meaning: an optimal subdivision of the quadratic into `val / 2` subdivisions
+// will have an error `sqrt_tol^2` (i.e. the desired tolerance).
+//
+// In the non-cusp case, the error scales as the inverse square of `val` (doubling `val` causes the
+// error to be one fourth), so the tolerance is actually not needed for the calculation (and gets
+// applied in the caller). In the cusp case, this scaling breaks down and the tolerance parameter
+// is needed to compute the correct result.
+fn estimate_subdiv(p0: vec2f, p1: vec2f, p2: vec2f, sqrt_tol: f32) -> SubdivResult {
     let d01 = p1 - p0;
     let d12 = p2 - p1;
     let dd = d01 - d12;
@@ -60,7 +77,7 @@ fn estimate_subdiv(p0: vec2f, p1: vec2f, p2: vec2f, sqrt_tol: f32, transform_sca
     let cross_inv = select(1.0 / cross, 1.0e9, abs(cross) < 1.0e-9);
     let x0 = dot(d01, dd) * cross_inv;
     let x2 = dot(d12, dd) * cross_inv;
-    let scale = abs(transform_scale * cross / (length(dd) * (x2 - x0)));
+    let scale = abs(cross / (length(dd) * (x2 - x0)));
 
     let a0 = approx_parabola_integral(x0);
     let a2 = approx_parabola_integral(x2);
@@ -128,21 +145,60 @@ fn cubic_end_normal(p0: vec2f, p1: vec2f, p2: vec2f, p3: vec2f) -> vec2f {
 
 let MAX_QUADS = 16u;
 
-fn flatten_cubic(cubic: Cubic, transform: Transform, offset: f32) {
-    let p0 = cubic.p0;
-    let p1 = cubic.p1;
-    let p2 = cubic.p2;
-    let p3 = cubic.p3;
+// This function flattens a cubic Bézier by first converting it into quadratics and
+// approximates the optimal flattening of those using a variation of the method described in
+// https://raphlinus.github.io/graphics/curves/2019/12/23/flatten-quadbez.html.
+//
+// When the `offset` parameter is zero (i.e. the path is a "fill"), the flattening is performed
+// directly on the transformed (device-space) control points as this produces near-optimal
+// flattening even in the presence of a non-angle-preserving transform.
+//
+// When the `offset` is non-zero, the flattening is performed in the curve's local coordinate space
+// and the offset curve gets transformed to device-space post-flattening. This handles
+// non-angle-preserving transforms well while keeping the logic simple.
+//
+// When subdividing the cubic in its local coordinate space, the scale factor gets decomposed out of
+// the local-to-device transform and gets factored into the tolerance threshold when estimating
+// subdivisions.
+fn flatten_cubic(cubic: Cubic, local_to_device: Transform, offset: f32) {
+    var p0: vec2f;
+    var p1: vec2f;
+    var p2: vec2f;
+    var p3: vec2f;
+    var scale: f32;
+    var transform: Transform;
+    if offset == 0. {
+        let t = local_to_device;
+        p0 = transform_apply(t, cubic.p0);
+        p1 = transform_apply(t, cubic.p1);
+        p2 = transform_apply(t, cubic.p2);
+        p3 = transform_apply(t, cubic.p3);
+        scale = 1.;
+        transform = transform_identity();
+    } else {
+        p0 = cubic.p0;
+        p1 = cubic.p1;
+        p2 = cubic.p2;
+        p3 = cubic.p3;
+
+        transform = local_to_device;
+        let mat = transform.mat;
+        scale = 0.5 * length(vec2(mat.x + mat.w, mat.y - mat.z)) +
+                length(vec2(mat.x - mat.w, mat.y + mat.z));
+    }
+
     let err_v = 3.0 * (p2 - p1) + p0 - p3;
     let err = dot(err_v, err_v);
     let ACCURACY = 0.25;
     let Q_ACCURACY = ACCURACY * 0.1;
     let REM_ACCURACY = ACCURACY - Q_ACCURACY;
     let MAX_HYPOT2 = 432.0 * Q_ACCURACY * Q_ACCURACY;
-    let scale = vec2(length(transform.mat.xz), length(transform.mat.yw));
-    let scale_factor = max(scale.x, scale.y);
-    var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0)) * scale_factor), 1u);
+    let scaled_sqrt_tol = sqrt(REM_ACCURACY / scale);
+    // Fudge the subdivision count metric to account for `scale` when the subdivision is done in local
+    // coordinates.
+    var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0)) * scale), 1u);
     n_quads = min(n_quads, MAX_QUADS);
+
     var keep_params: array<SubdivResult, MAX_QUADS>;
     var val = 0.0;
     var qp0 = p0;
@@ -154,8 +210,7 @@ fn flatten_cubic(cubic: Cubic, transform: Transform, offset: f32) {
         qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
 
         // TODO: Estimate an accurate subdivision count for strokes, handling cusps.
-        let tol = sqrt(REM_ACCURACY);
-        let params = estimate_subdiv(qp0, qp1, qp2, tol, scale_factor);
+        let params = estimate_subdiv(qp0, qp1, qp2, scaled_sqrt_tol);
         keep_params[i] = params;
         val += params.val;
         qp0 = qp2;
@@ -164,7 +219,7 @@ fn flatten_cubic(cubic: Cubic, transform: Transform, offset: f32) {
     // Normal vector to calculate the start point of the offset curve.
     var n0 = offset * cubic_start_normal(p0, p1, p2, p3);
 
-    let n = max(u32(ceil(val * (0.5 / sqrt(REM_ACCURACY)))), 1u);
+    let n = max(u32(ceil(val * (0.5 / scaled_sqrt_tol))), 1u);
     var lp0 = p0;
     qp0 = p0;
     let v_step = val / f32(n);
@@ -199,7 +254,7 @@ fn flatten_cubic(cubic: Cubic, transform: Transform, offset: f32) {
             // "flatten_cubic_at_offset" such that it outputs one cubic at an offset. That should
             // more closely resemble the end state of this shader which will work like a state
             // machine.
-            if cubic.flags == 1u {
+            if offset > 0. {
                 var n1: vec2f;
                 if all(lp1 == p3) {
                     n1 = cubic_end_normal(p0, p1, p2, p3);
@@ -248,6 +303,7 @@ fn draw_join(
             let front1 = p0 + n_next;
             var back0 = p0 - n_next;
             let back1 = p0 - n_prev;
+            var line_ix: u32;
 
             if 2. * hypot < (hypot + d) * miter_limit * miter_limit && c != 0. {
                 let is_backside = c > 0.;
@@ -258,15 +314,21 @@ fn draw_join(
                 let v = fp_this - fp_last;
                 let h = (tan_prev.x * v.y - tan_prev.y * v.x) / c;
                 let miter_pt = fp_this - tan_next * h;
-                output_line_with_transform(path_ix, p, miter_pt, transform);
+
+                line_ix = atomicAdd(&bump.lines, 3u);
+                write_line_with_transform(line_ix, path_ix, p, miter_pt, transform);
+                line_ix += 1u;
 
                 if is_backside {
                     back0 = miter_pt;
                 } else {
                     front0 = miter_pt;
                 }
+            } else {
+                line_ix = atomicAdd(&bump.lines, 2u);
             }
-            output_two_lines_with_transform(path_ix, front0, front1, back0, back1, transform);
+            write_line_with_transform(line_ix, path_ix, front0, front1, transform);
+            write_line_with_transform(line_ix + 1u, path_ix, back0, back1, transform);
         }
         case /*STYLE_FLAGS_JOIN_ROUND*/0x20000000u: {
             // TODO: round join
@@ -297,6 +359,10 @@ struct Transform {
     translate: vec2f,
 }
 
+fn transform_identity() -> Transform {
+    return Transform(vec4(1., 0., 0., 1.), vec2(0.));
+}
+
 fn read_transform(transform_base: u32, ix: u32) -> Transform {
     let base = transform_base + ix * 6u;
     let c0 = bitcast<f32>(scene[base]);
@@ -402,18 +468,26 @@ fn read_path_segment(tag: PathTagData, is_stroke: bool) -> CubicPoints {
     return CubicPoints(p0, p1, p2, p3);
 }
 
-fn output_line(path_ix: u32, p0: vec2f, p1: vec2f) {
-    let line_ix = atomicAdd(&bump.lines, 1u);
+// Writes a line into a the `lines` buffer at a pre-allocated location designated by `line_ix`.
+fn write_line(line_ix: u32, path_ix: u32, p0: vec2f, p1: vec2f) {
     bbox = vec4(min(bbox.xy, min(p0, p1)), max(bbox.zw, max(p0, p1)));
     lines[line_ix] = LineSoup(path_ix, p0, p1);
 }
 
+fn write_line_with_transform(line_ix: u32, path_ix: u32, p0: vec2f, p1: vec2f, t: Transform) {
+    let tp0 = transform_apply(t, p0);
+    let tp1 = transform_apply(t, p1);
+    write_line(line_ix, path_ix, tp0, tp1);
+}
+
+fn output_line(path_ix: u32, p0: vec2f, p1: vec2f) {
+    let line_ix = atomicAdd(&bump.lines, 1u);
+    write_line(line_ix, path_ix, p0, p1);
+}
+
 fn output_line_with_transform(path_ix: u32, p0: vec2f, p1: vec2f, transform: Transform) {
     let line_ix = atomicAdd(&bump.lines, 1u);
-    let tp0 = transform_apply(transform, p0);
-    let tp1 = transform_apply(transform, p1);
-    bbox = vec4(min(bbox.xy, min(tp0, tp1)), max(bbox.zw, max(tp0, tp1)));
-    lines[line_ix] = LineSoup(path_ix, tp0, tp1);
+    write_line_with_transform(line_ix, path_ix, p0, p1, transform);
 }
 
 fn output_two_lines_with_transform(
@@ -423,15 +497,8 @@ fn output_two_lines_with_transform(
     transform: Transform
 ) {
     let line_ix = atomicAdd(&bump.lines, 2u);
-    let tp00 = transform_apply(transform, p00);
-    let tp01 = transform_apply(transform, p01);
-    let tp10 = transform_apply(transform, p10);
-    let tp11 = transform_apply(transform, p11);
-
-    bbox = vec4(min(bbox.xy, min(tp00, tp01)), max(bbox.zw, max(tp00, tp01)));
-    bbox = vec4(min(bbox.xy, min(tp10, tp11)), max(bbox.zw, max(tp10, tp11)));
-    lines[line_ix]      = LineSoup(path_ix, tp00, tp01);
-    lines[line_ix + 1u] = LineSoup(path_ix, tp10, tp11);
+    write_line_with_transform(line_ix, path_ix, p00, p01, transform);
+    write_line_with_transform(line_ix + 1u, path_ix, p10, p11, transform);
 }
 
 struct NeighboringSegment {

From d4fd8398758c5680b0567ebcd863940fbc674a28 Mon Sep 17 00:00:00 2001
From: Arman Uguray <armansito@google.com>
Date: Wed, 15 Nov 2023 11:11:38 -0800
Subject: [PATCH 6/6] Address review comment about indexing into the result of
 unpack

---
 shader/flatten.wgsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/shader/flatten.wgsl b/shader/flatten.wgsl
index 6e3f8dc0e..d56853b5a 100644
--- a/shader/flatten.wgsl
+++ b/shader/flatten.wgsl
@@ -297,7 +297,7 @@ fn draw_join(
             let c = tan_prev.x * tan_next.y - tan_prev.y * tan_next.x;
             let d = dot(tan_prev, tan_next);
             let hypot = length(vec2f(c, d));
-            let miter_limit = unpack2x16float(style_flags & STYLE_MITER_LIMIT_MASK).x;
+            let miter_limit = unpack2x16float(style_flags & STYLE_MITER_LIMIT_MASK)[0];
 
             var front0 = p0 + n_prev;
             let front1 = p0 + n_next;