Skip to content

Commit

Permalink
Handle transparent objects
Browse files Browse the repository at this point in the history
  • Loading branch information
ishitatsuyuki committed Mar 14, 2021
1 parent 70c24af commit b908a32
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 33 deletions.
2 changes: 2 additions & 0 deletions piet-gpu-types/src/ptcl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ piet_gpu! {
EndClip,
Stroke(CmdStroke),
Jump(CmdJump),
SaveStencil,
RestoreStencil,
}
}
}
7 changes: 0 additions & 7 deletions piet-gpu/bin/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,6 @@ fn main() -> Result<(), Error> {
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);

/*
let mut data: Vec<u32> = Default::default();
renderer.tile_buf.read(&mut data).unwrap();
piet_gpu::dump_k1_data(&data);
trace_ptcl(&data);
*/

let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
// (probably passing a slice into a closure). But for now: keep it simple.
Expand Down
14 changes: 9 additions & 5 deletions piet-gpu/bin/winit.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
use piet_gpu_hal::hub;
use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Error, ImageLayout};

use piet_gpu::{render_scene, PietGpuRenderContext, Renderer, HEIGHT, WIDTH};
use std::thread::sleep;
use std::time::Duration;

use winit::{
event::{Event, WindowEvent},
event_loop::{ControlFlow, EventLoop},
window::WindowBuilder,
};

use piet_gpu::{HEIGHT, PietGpuRenderContext, render_scene, Renderer, WIDTH};
use piet_gpu_hal::{CmdBuf, Error, ImageLayout};
use piet_gpu_hal::hub;
use piet_gpu_hal::vulkan::VkInstance;

const NUM_FRAMES: usize = 2;

fn main() -> Result<(), Error> {
Expand Down Expand Up @@ -118,6 +120,8 @@ fn main() -> Result<(), Error> {
.present(image_idx, &[present_semaphores[frame_idx]])
.unwrap();

sleep(Duration::from_millis(5));

current_frame += 1;
}
_ => (),
Expand Down
75 changes: 61 additions & 14 deletions piet-gpu/shader/coarse.comp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
return true;
}

bool alloc_cmd_rev(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset >= cmd_limit) {
return true;
}
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC);
if (new_cmd.failed) {
return false;
}
CmdJump jump = CmdJump(cmd_ref.offset);
cmd_alloc = new_cmd.alloc;
cmd_ref = CmdRef(cmd_alloc.offset + PTCL_INITIAL_ALLOC - Cmd_size);
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
cmd_limit = cmd_alloc.offset + Cmd_size;
return true;
}

void main() {
if (mem_error != NO_ERROR) {
return;
Expand All @@ -108,9 +124,15 @@ void main() {
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
uint this_tile_ix = (bin_tile_y + tile_y) * conf.width_in_tiles + bin_tile_x + tile_x;
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC * 2, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
Alloc alpha_cmd_alloc = slice_mem(conf.ptcl_alloc, PTCL_INITIAL_ALLOC * (this_tile_ix * 2), PTCL_INITIAL_ALLOC);
CmdRef alpha_cmd_ref = CmdRef(alpha_cmd_alloc.offset + PTCL_INITIAL_ALLOC - Cmd_size);
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(alpha_cmd_alloc, alpha_cmd_ref);
}
uint alpha_cmd_limit = alpha_cmd_ref.offset + Cmd_size;
// The nesting depth of the clip stack
uint clip_depth = 1;
// State for the "clip zero" optimization. If it's nonzero, then we are
Expand Down Expand Up @@ -312,19 +334,43 @@ void main() {
Tile tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoFill fill = Annotated_Fill_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
CmdFill cmd_fill;
cmd_fill.tile_ref = tile.tile.offset;
cmd_fill.backdrop = tile.backdrop;
cmd_fill.rgba_color = fill.rgba_color;
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
if (tile.tile.offset == 0) {
// Anything below is occluded due to drawing front-to-back.
clip_zero_depth = clip_depth;
if (unpackUnorm4x8(fill.rgba_color).wzyx.a == 1.0) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
CmdFill cmd_fill;
cmd_fill.tile_ref = tile.tile.offset;
cmd_fill.backdrop = tile.backdrop;
cmd_fill.rgba_color = fill.rgba_color;
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
if (tile.tile.offset == 0) {
// Anything below is occluded due to drawing front-to-back.
clip_zero_depth = clip_depth;
}
cmd_ref.offset += Cmd_size;
} else {
if (!alloc_cmd_rev(alpha_cmd_alloc, alpha_cmd_ref, alpha_cmd_limit)) {
break;
}
alpha_cmd_ref.offset -= Cmd_size;
CmdFill cmd_fill;
cmd_fill.tile_ref = tile.tile.offset;
cmd_fill.backdrop = tile.backdrop;
cmd_fill.rgba_color = fill.rgba_color;
Cmd_Fill_write(alpha_cmd_alloc, alpha_cmd_ref, cmd_fill);

if (!alloc_cmd_rev(alpha_cmd_alloc, alpha_cmd_ref, alpha_cmd_limit)) {
break;
}
alpha_cmd_ref.offset -= Cmd_size;
Cmd_RestoreStencil_write(alpha_cmd_alloc, alpha_cmd_ref);

if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
Cmd_SaveStencil_write(cmd_alloc, cmd_ref);
cmd_ref.offset += Cmd_size;
}
cmd_ref.offset += Cmd_size;
break;
case Annotated_BeginClip:
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
Expand Down Expand Up @@ -393,6 +439,7 @@ void main() {
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
}
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(cmd_alloc, cmd_ref);
CmdJump jump = CmdJump(alpha_cmd_ref.offset);
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
}
}
Binary file modified piet-gpu/shader/coarse.spv
Binary file not shown.
34 changes: 28 additions & 6 deletions piet-gpu/shader/kernel4.comp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ uint getLut(vec2 n, float c) {
c = -c;
mask = ~0;
}
// It is also possible to implement this in a branchless manner by clamping c to be under 1. It allows more ILP on
// at least AMD GPUs (a wait is forcibly inserted when converging the branches so we need to avoid that), but when
// testing on RX 5700 XT, it turned out to be slower even though it should hide the latency more effectively. Maybe
// it's around the point where both peak bandwidth and latency could become the bottleneck.
if (c >= 1.) {
// mask ^= 0;
} else {
Expand Down Expand Up @@ -114,16 +118,19 @@ void main() {
}

uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC * 2, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);

uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint);
vec3 rgb[CHUNK];
uint stencil[CHUNK];
uint coverage[CHUNK];
uint stencil_stack[32][CHUNK];
uint stencil_stack[256][CHUNK];
uint clip_depth = 0;
uint alpha_stencil_stack[256][CHUNK];
uint alpha_depth = 0;

Alloc clip_tos = new_alloc(0, 0);
for (uint i = 0; i < CHUNK; i++) {
stencil[i] = 0;
Expand Down Expand Up @@ -165,10 +172,13 @@ void main() {
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
for (uint k = 0; k < CHUNK; k++) {
float area = float(bitCount(coverage[k] & ~stencil[k])) / 32.;
// TODO: alpha
rgb[k] = rgb[k] + fg_rgba.rgb * area;
// only cover if opaque...
stencil[k] |= coverage[k];
if (fg_rgba.a == 1.0) {
rgb[k] = rgb[k] + fg_rgba.rgb * area;
// only update stencil if opaque
stencil[k] |= coverage[k];
} else {
rgb[k] = mix(rgb[k], fg_rgba.rgb, fg_rgba.a * area);
}
}
break;
case Cmd_BeginClip:
Expand All @@ -188,6 +198,18 @@ void main() {
stencil[k] = stencil_stack[clip_depth][k];
}
break;
case Cmd_SaveStencil:
for (uint k = 0; k < CHUNK; k++) {
alpha_stencil_stack[alpha_depth][k] = stencil[k];
}
alpha_depth++;
break;
case Cmd_RestoreStencil:
alpha_depth--;
for (uint k = 0; k < CHUNK; k++) {
stencil[k] = alpha_stencil_stack[alpha_depth][k];
}
break;
case Cmd_Jump:
cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref);
cmd_alloc.offset = cmd_ref.offset;
Expand Down
Binary file modified piet-gpu/shader/kernel4.spv
Binary file not shown.
10 changes: 10 additions & 0 deletions piet-gpu/shader/ptcl.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
#define Cmd_EndClip 3
#define Cmd_Stroke 4
#define Cmd_Jump 5
#define Cmd_SaveStencil 6
#define Cmd_RestoreStencil 7
#define Cmd_size 16

CmdRef Cmd_index(CmdRef ref, uint index) {
Expand Down Expand Up @@ -194,3 +196,11 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) {
CmdJump_write(a, CmdJumpRef(ref.offset + 4), s);
}

void Cmd_SaveStencil_write(Alloc a, CmdRef ref) {
write_mem(a, ref.offset >> 2, Cmd_SaveStencil);
}

void Cmd_RestoreStencil_write(Alloc a, CmdRef ref) {
write_mem(a, ref.offset >> 2, Cmd_RestoreStencil);
}

2 changes: 1 addition & 1 deletion piet-gpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ impl Renderer {
let bin_base = alloc;
alloc += ((n_paths + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc;
alloc += WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
alloc += WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC * 2;
let pathseg_base = alloc;
alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3;
let anno_base = alloc;
Expand Down

0 comments on commit b908a32

Please sign in to comment.