Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Robust memory allocation handling #606

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/scenes/src/mmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ impl TestScene for MMark {
let n = if c < 10 {
(c + 1) * 1000
} else {
((c - 8) * 10000).min(120_000)
// The 190_000 scene can't be uploaded due to wgpu's limits
((c - 8) * 10000).min(190_000)
};
self.resize(n);
let mut rng = rand::thread_rng();
Expand Down
2 changes: 1 addition & 1 deletion examples/with_winit/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ impl Snapshot {
labels.push(format!("ptcl: {}", bump.ptcl));
labels.push(format!("tile: {}", bump.tile));
labels.push(format!("segments: {}", bump.segments));
labels.push(format!("blend: {}", bump.blend));
labels.push(format!("blend: {}", bump.blend_spill));
}

// height / 2 is dedicated to the text labels and the rest is filled by the bar graph.
Expand Down
265 changes: 241 additions & 24 deletions vello/src/lib.rs

Large diffs are not rendered by default.

78 changes: 54 additions & 24 deletions vello/src/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ use crate::{AaConfig, RenderParams};
#[cfg(feature = "wgpu")]
use crate::Scene;

use vello_encoding::{make_mask_lut, make_mask_lut_16, Encoding, Resolver, WorkgroupSize};
use vello_encoding::{
make_mask_lut, make_mask_lut_16, BumpAllocators, Encoding, Resolver, WorkgroupSize,
};

/// State for a render in progress.
pub struct Render {
Expand Down Expand Up @@ -82,8 +84,9 @@ pub(crate) fn render_full(
resolver: &mut Resolver,
shaders: &FullShaders,
params: &RenderParams,
) -> (Recording, ResourceProxy) {
render_encoding_full(scene.encoding(), resolver, shaders, params)
bump_sizes: BumpAllocators,
) -> (Recording, ImageProxy, BufferProxy) {
render_encoding_full(scene.encoding(), resolver, shaders, params, bump_sizes)
}

#[cfg(feature = "wgpu")]
Expand All @@ -96,12 +99,15 @@ pub(crate) fn render_encoding_full(
resolver: &mut Resolver,
shaders: &FullShaders,
params: &RenderParams,
) -> (Recording, ResourceProxy) {
bump_sizes: BumpAllocators,
) -> (Recording, ImageProxy, BufferProxy) {
let mut render = Render::new();
let mut recording = render.render_encoding_coarse(encoding, resolver, shaders, params, false);
let mut recording =
render.render_encoding_coarse(encoding, resolver, shaders, params, bump_sizes, false);
let out_image = render.out_image();
let bump_buf = render.bump_buf();
render.record_fine(shaders, &mut recording);
(recording, out_image.into())
(recording, out_image, bump_buf)
}

impl Default for Render {
Expand Down Expand Up @@ -131,7 +137,8 @@ impl Render {
resolver: &mut Resolver,
shaders: &FullShaders,
params: &RenderParams,
robust: bool,
bump_sizes: BumpAllocators,
#[cfg_attr(not(feature = "debug_layers"), allow(unused))] robust: bool,
) -> Recording {
use vello_encoding::RenderConfig;
let mut recording = Recording::default();
Expand Down Expand Up @@ -166,8 +173,20 @@ impl Render {
for image in images.images {
recording.write_image(image_atlas, image.1, image.2, image.0.clone());
}
let cpu_config =
RenderConfig::new(&layout, params.width, params.height, &params.base_color);
let cpu_config = RenderConfig::new(
layout,
params.width,
params.height,
&params.base_color,
bump_sizes,
);
// log::debug!("Config: {{ lines_size: {:?}, binning_size: {:?}, tiles_size: {:?}, seg_counts_size: {:?}, segments_size: {:?}, ptcl_size: {:?} }}",
// cpu_config.gpu.lines_size,
// cpu_config.gpu.binning_size,
// cpu_config.gpu.tiles_size,
// cpu_config.gpu.seg_counts_size,
// cpu_config.gpu.segments_size,
// cpu_config.gpu.ptcl_size);
let buffer_sizes = &cpu_config.buffer_sizes;
let wg_counts = &cpu_config.workgroup_counts;

Expand All @@ -183,18 +202,28 @@ impl Render {
recording.upload_uniform("config", bytemuck::bytes_of(&cpu_config.gpu)),
);
let info_bin_data_buf = ResourceProxy::new_buf(
buffer_sizes.bin_data.size_in_bytes() as u64,
buffer_sizes.bump_buffers.binning.size_in_bytes() as u64,
"info_bin_data_buf",
);
let tile_buf =
ResourceProxy::new_buf(buffer_sizes.tiles.size_in_bytes().into(), "tile_buf");
let segments_buf =
ResourceProxy::new_buf(buffer_sizes.segments.size_in_bytes().into(), "segments_buf");
let ptcl_buf = ResourceProxy::new_buf(buffer_sizes.ptcl.size_in_bytes().into(), "ptcl_buf");
let tile_buf = ResourceProxy::new_buf(
buffer_sizes.bump_buffers.tile.size_in_bytes().into(),
"tile_buf",
);
let segments_buf = ResourceProxy::new_buf(
buffer_sizes.bump_buffers.segments.size_in_bytes().into(),
"segments_buf",
);
let ptcl_buf = ResourceProxy::new_buf(
buffer_sizes.bump_buffers.ptcl.size_in_bytes().into(),
"ptcl_buf",
);
let reduced_buf = ResourceProxy::new_buf(
buffer_sizes.path_reduced.size_in_bytes().into(),
"reduced_buf",
);
let bump_buf = BufferProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf");
let bump_buf = ResourceProxy::Buffer(bump_buf);
recording.dispatch(shaders.prepare, (1, 1, 1), [config_buf, bump_buf]);
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
Expand Down Expand Up @@ -255,11 +284,10 @@ impl Render {
wg_counts.bbox_clear,
[config_buf, path_bbox_buf],
);
let bump_buf = BufferProxy::new(buffer_sizes.bump_alloc.size_in_bytes().into(), "bump_buf");
recording.clear_all(bump_buf);
let bump_buf = ResourceProxy::Buffer(bump_buf);
let lines_buf =
ResourceProxy::new_buf(buffer_sizes.lines.size_in_bytes().into(), "lines_buf");
let lines_buf = ResourceProxy::new_buf(
buffer_sizes.bump_buffers.lines.size_in_bytes().into(),
"lines_buf",
);
recording.dispatch(
shaders.flatten,
wg_counts.flatten,
Expand Down Expand Up @@ -390,7 +418,7 @@ impl Render {
[bump_buf, indirect_count_buf.into()],
);
let seg_counts_buf = ResourceProxy::new_buf(
buffer_sizes.seg_counts.size_in_bytes().into(),
buffer_sizes.bump_buffers.seg_counts.size_in_bytes().into(),
"seg_counts_buf",
);
recording.dispatch_indirect(
Expand Down Expand Up @@ -429,7 +457,7 @@ impl Render {
recording.dispatch(
shaders.path_tiling_setup,
wg_counts.path_tiling_setup,
[bump_buf, indirect_count_buf.into(), ptcl_buf],
[config_buf, bump_buf, indirect_count_buf.into(), ptcl_buf],
);
recording.dispatch_indirect(
shaders.path_tiling,
Expand All @@ -452,7 +480,7 @@ impl Render {
recording.free_resource(path_buf);
let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
let blend_spill_buf = BufferProxy::new(
buffer_sizes.blend_spill.size_in_bytes().into(),
buffer_sizes.bump_buffers.blend_spill.size_in_bytes().into(),
"blend_spill",
);
self.fine_wg_count = Some(wg_counts.fine);
Expand All @@ -469,7 +497,8 @@ impl Render {
image_atlas: ResourceProxy::Image(image_atlas),
out_image,
});
if robust {
// TODO: This check is a massive hack to disable robustness if
if !shaders.pathtag_is_cpu {
recording.download(*bump_buf.as_buf().unwrap());
}
recording.free_resource(bump_buf);
Expand Down Expand Up @@ -566,6 +595,7 @@ impl Render {
recording.free_resource(fine.gradient_image);
recording.free_resource(fine.image_atlas);
recording.free_resource(fine.info_bin_data_buf);
recording.free_resource(fine.blend_spill_buf);
// TODO: make mask buf persistent
if let Some(mask_buf) = self.mask_buf.take() {
recording.free_resource(mask_buf);
Expand Down
4 changes: 2 additions & 2 deletions vello/src/scene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use skrifa::{
GlyphId, MetadataProvider, OutlineGlyphCollection,
};
#[cfg(feature = "bump_estimate")]
use vello_encoding::BumpAllocatorMemory;
use vello_encoding::BumpAllocators;
use vello_encoding::{Encoding, Glyph, GlyphRun, Patch, Transform};

// TODO - Document invariants and edge cases (#470)
Expand Down Expand Up @@ -49,7 +49,7 @@ impl Scene {
/// Tally up the bump allocator estimate for the current state of the encoding,
/// taking into account an optional `transform` applied to the entire scene.
#[cfg(feature = "bump_estimate")]
pub fn bump_estimate(&self, transform: Option<Affine>) -> BumpAllocatorMemory {
pub fn bump_estimate(&self, transform: Option<Affine>) -> BumpAllocators {
self.estimator
.tally(transform.as_ref().map(Transform::from_kurbo).as_ref())
}
Expand Down
5 changes: 4 additions & 1 deletion vello/src/shaders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use crate::{

// Shaders for the full pipeline
pub struct FullShaders {
pub prepare: ShaderId,
pub pathtag_reduce: ShaderId,
pub pathtag_reduce2: ShaderId,
pub pathtag_scan1: ShaderId,
Expand Down Expand Up @@ -101,6 +102,7 @@ pub(crate) fn full_shaders(
};
}

let prepare = add_shader!(prepare, [Buffer, Buffer]);
let pathtag_reduce = add_shader!(pathtag_reduce, [Uniform, BufReadOnly, Buffer]);
let pathtag_reduce2 = add_shader!(
pathtag_reduce2,
Expand Down Expand Up @@ -194,7 +196,7 @@ pub(crate) fn full_shaders(
Buffer,
]
);
let path_tiling_setup = add_shader!(path_tiling_setup, [Buffer, Buffer, Buffer]);
let path_tiling_setup = add_shader!(path_tiling_setup, [Uniform, Buffer, Buffer, Buffer]);
let path_tiling = add_shader!(
path_tiling,
[
Expand Down Expand Up @@ -249,6 +251,7 @@ pub(crate) fn full_shaders(
};

Ok(FullShaders {
prepare,
pathtag_reduce,
pathtag_reduce2,
pathtag_scan,
Expand Down
24 changes: 23 additions & 1 deletion vello/src/wgpu_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,9 @@ impl WgpuEngine {
transient_map
.bufs
.insert(buf_proxy.id, TransientBuf::Cpu(bytes));
let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST;
// TODO: More principled way of working out usages
let usage =
BufferUsages::UNIFORM | BufferUsages::COPY_DST | BufferUsages::STORAGE;
// Same consideration as above
let buf = self
.pool
Expand Down Expand Up @@ -683,6 +685,12 @@ impl WgpuEngine {
let src_buf = self
.bind_map
.get_gpu_buf(proxy.id)
.or_else(|| {
transient_map.bufs.get(&proxy.id).and_then(|it| match it {
TransientBuf::Cpu(_) => None,
TransientBuf::Gpu(buf) => Some(*buf),
})
})
.ok_or(Error::UnavailableBufferUsed(proxy.name, "download"))?;
let usage = BufferUsages::MAP_READ | BufferUsages::COPY_DST;
let buf = self.pool.get_buf(proxy.size, "download", usage, device);
Expand Down Expand Up @@ -746,6 +754,10 @@ impl WgpuEngine {
self.downloads.get(&buf.id)
}

pub fn take_download(&mut self, buf: BufferProxy) -> Option<Buffer> {
self.downloads.remove(&buf.id)
}

pub fn free_download(&mut self, buf: BufferProxy) {
self.downloads.remove(&buf.id);
}
Expand Down Expand Up @@ -940,6 +952,16 @@ impl ResourcePool {
device: &Device,
) -> Buffer {
let rounded_size = Self::size_class(size, SIZE_CLASS_BITS);
// let max_storage_buffer_binding_size =
// device.limits().max_storage_buffer_binding_size.into();
// if rounded_size > max_storage_buffer_binding_size {
// if size < max_storage_buffer_binding_size {
// log::warn!("Would allocate buffer {name} to be larger than {max_storage_buffer_binding_size}. Clamped");
// rounded_size = max_storage_buffer_binding_size;
// } else {
// log::warn!("Would allocate buffer {name} to be larger than {max_storage_buffer_binding_size}, which is not allowed");
// }
// }
let props = BufferProperties {
size: rounded_size,
usages: usage,
Expand Down
Loading