Skip to content

Commit

Permalink
Merge #3436
Browse files Browse the repository at this point in the history
3436: [dx12] fix and improve descriptor handling r=kvark a=kvark

An improved port of #3434 for master

Co-authored-by: Dzmitry Malyshau <[email protected]>
  • Loading branch information
bors[bot] and kvark authored Oct 25, 2020
2 parents f09b062 + 2186b1c commit b365bec
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 66 deletions.
57 changes: 57 additions & 0 deletions src/backend/dx12/src/descriptors_cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,60 @@ impl MultiCopyAccumulator {
}
}
}

pub struct DescriptorUpdater {
heaps: Vec<HeapLinear>,
heap_index: usize,
reset_heap_index: usize,
avoid_overwrite: bool,
}

impl DescriptorUpdater {
pub fn new(device: native::Device, avoid_overwrite: bool) -> Self {
DescriptorUpdater {
heaps: vec![Self::create_heap(device)],
heap_index: 0,
reset_heap_index: 0,
avoid_overwrite,
}
}

pub unsafe fn destroy(&mut self) {
for heap in self.heaps.drain(..) {
heap.destroy();
}
}

pub fn reset(&mut self) {
if self.avoid_overwrite {
self.reset_heap_index = self.heap_index;
} else {
self.heap_index = 0;
for heap in self.heaps.iter_mut() {
heap.clear();
}
}
}

fn create_heap(device: native::Device) -> HeapLinear {
let size = 1 << 12; //arbitrary
HeapLinear::new(device, native::DescriptorHeapType::CbvSrvUav, size)
}

pub fn alloc_handle(&mut self, device: native::Device) -> CpuDescriptor {
if self.heaps[self.heap_index].is_full() {
self.heap_index += 1;
if self.heap_index == self.heaps.len() {
self.heap_index = 0;
}
if self.heap_index == self.reset_heap_index {
let heap = Self::create_heap(device);
self.heaps.insert(self.heap_index, heap);
self.reset_heap_index += 1;
} else {
self.heaps[self.heap_index].clear();
}
}
self.heaps[self.heap_index].alloc_handle()
}
}
71 changes: 21 additions & 50 deletions src/backend/dx12/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2159,7 +2159,8 @@ impl d::Device<B> for Device {
// Constant buffer view sizes need to be aligned.
// Coupled with the offset alignment we can enforce an aligned CBV size
// on descriptor updates.
size = (size + 255) & !255;
let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1;
size = (size + mask) & !mask;
}
if usage.contains(buffer::Usage::TRANSFER_DST) {
// minimum of 1 word for the clear UAV
Expand Down Expand Up @@ -2289,7 +2290,10 @@ impl d::Device<B> for Device {
let buffer = buffer.expect_bound();
let buffer_features = {
let idx = format.map(|fmt| fmt as usize).unwrap_or(0);
self.format_properties.get(idx).properties.buffer_features
self.format_properties
.resolve(idx)
.properties
.buffer_features
};
let (format, format_desc) = match format.and_then(conv::map_format) {
Some(fmt) => (fmt, format.unwrap().surface_desc()),
Expand Down Expand Up @@ -2384,7 +2388,7 @@ impl d::Device<B> for Device {
let view_format = conv::map_format(format);
let extent = kind.extent();

let format_info = self.format_properties.get(format as usize);
let format_info = self.format_properties.resolve(format as usize);
let (layout, features) = match tiling {
image::Tiling::Optimal => (
d3d12::D3D12_TEXTURE_LAYOUT_UNKNOWN,
Expand Down Expand Up @@ -2567,7 +2571,7 @@ impl d::Device<B> for Device {
// if the format supports being rendered into, allowing us to create clear_Xv
let format_properties = self
.format_properties
.get(image_unbound.format as usize)
.resolve(image_unbound.format as usize)
.properties;
let props = match image_unbound.tiling {
image::Tiling::Optimal => format_properties.optimal_tiling,
Expand Down Expand Up @@ -2897,8 +2901,8 @@ impl d::Device<B> for Device {
J: IntoIterator,
J::Item: Borrow<pso::Descriptor<'a, B>>,
{
let mut descriptor_update_pools = self.descriptor_update_pools.lock();
let mut update_pool_index = 0;
let mut descriptor_updater = self.descriptor_updater.lock();
descriptor_updater.reset();

let mut accum = descriptors_cpu::MultiCopyAccumulator::default();
debug!("write_descriptor_sets");
Expand Down Expand Up @@ -2941,15 +2945,6 @@ impl d::Device<B> for Device {
buffer_address + sub.offset;
} else {
// Descriptor table
if update_pool_index == descriptor_update_pools.len() {
let max_size = 1u64 << 12; //arbitrary
descriptor_update_pools.push(descriptors_cpu::HeapLinear::new(
self.raw,
native::DescriptorHeapType::CbvSrvUav,
max_size as _,
));
}
let mut heap = descriptor_update_pools.pop().unwrap();
let size = sub.size_to(buffer.requirements.size);

if bind_info.content.contains(r::DescriptorContent::CBV) {
Expand All @@ -2958,12 +2953,14 @@ impl d::Device<B> for Device {
// alignment to 256 allows us to patch the size here.
// We can always enforce the size to be aligned to 256 for
// CBVs without going out-of-bounds.
let mask =
d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1;
let desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC {
BufferLocation: (*buffer.resource).GetGPUVirtualAddress()
+ sub.offset,
SizeInBytes: ((size + 0xFF) & !0xFF) as _,
SizeInBytes: (size as u32 + mask) as u32 & !mask,
};
let handle = heap.alloc_handle();
let handle = descriptor_updater.alloc_handle(self.raw);
self.raw.CreateConstantBufferView(&desc, handle);
src_cbv = Some(handle);
}
Expand All @@ -2981,7 +2978,7 @@ impl d::Device<B> for Device {
StructureByteStride: 0,
Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW,
};
let handle = heap.alloc_handle();
let handle = descriptor_updater.alloc_handle(self.raw);
self.raw.CreateShaderResourceView(
buffer.resource.as_mut_ptr(),
&desc,
Expand All @@ -3003,21 +3000,7 @@ impl d::Device<B> for Device {
CounterOffsetInBytes: 0,
Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW,
};
if heap.is_full() {
// pool is full, move to the next one
update_pool_index += 1;
let max_size = 1u64 << 12; //arbitrary
let full_heap = mem::replace(
&mut heap,
descriptors_cpu::HeapLinear::new(
self.raw,
native::DescriptorHeapType::CbvSrvUav,
max_size as _,
),
);
descriptor_update_pools.push(full_heap);
}
let handle = heap.alloc_handle();
let handle = descriptor_updater.alloc_handle(self.raw);
self.raw.CreateUnorderedAccessView(
buffer.resource.as_mut_ptr(),
ptr::null_mut(),
Expand All @@ -3026,13 +3009,6 @@ impl d::Device<B> for Device {
);
src_uav = Some(handle);
}

// always leave this block of code prepared
if heap.is_full() {
// pool is full, move to the next one
update_pool_index += 1;
}
descriptor_update_pools.push(heap);
}
}
pso::Descriptor::Image(image, _layout) => {
Expand Down Expand Up @@ -3104,12 +3080,7 @@ impl d::Device<B> for Device {
}
}

accum.flush(self.raw.clone());

// reset the temporary CPU-size descriptor pools
for buffer_desc_pool in descriptor_update_pools.iter_mut() {
buffer_desc_pool.clear();
}
accum.flush(self.raw);
}

unsafe fn copy_descriptor_sets<'a, I>(&self, copy_iter: I)
Expand All @@ -3127,8 +3098,8 @@ impl d::Device<B> for Device {
if let (Some(src_range), Some(dst_range)) =
(src_info.view_range.as_ref(), dst_info.view_range.as_ref())
{
assert!(copy.src_array_offset + copy.count <= src_range.count as usize);
assert!(copy.dst_array_offset + copy.count <= dst_range.count as usize);
assert!(copy.src_array_offset + copy.count <= src_range.handle.size as usize);
assert!(copy.dst_array_offset + copy.count <= dst_range.handle.size as usize);
let count = copy.count as u32;
accum
.src_views
Expand All @@ -3142,11 +3113,11 @@ impl d::Device<B> for Device {
{
assert!(
src_info.count as usize + copy.src_array_offset + copy.count
<= src_range.count as usize
<= src_range.handle.size as usize
);
assert!(
dst_info.count as usize + copy.dst_array_offset + copy.count
<= dst_range.count as usize
<= dst_range.handle.size as usize
);
accum.src_views.add(
src_range.at(src_info.count + copy.src_array_offset as u64),
Expand Down
35 changes: 25 additions & 10 deletions src/backend/dx12/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ static QUEUE_FAMILIES: [QueueFamily; 4] = [
QueueFamily::Normal(q::QueueType::Transfer),
];

#[derive(Default)]
struct Workarounds {
// On WARP, temporary CPU descriptors are still used by the runtime
// after we call `CopyDescriptors`.
avoid_cpu_descriptor_overwrites: bool,
}

//Note: fields are dropped in the order of declaration, so we put the
// most owning fields last.
pub struct PhysicalDevice {
Expand All @@ -187,6 +194,7 @@ pub struct PhysicalDevice {
limits: Limits,
format_properties: Arc<FormatProperties>,
private_caps: Capabilities,
workarounds: Workarounds,
heap_properties: &'static [HeapProperties; NUM_HEAP_PROPERTIES],
memory_properties: adapter::MemoryProperties,
// Indicates that there is currently an active logical device.
Expand Down Expand Up @@ -306,7 +314,7 @@ impl adapter::PhysicalDevice<Backend> for PhysicalDevice {

fn format_properties(&self, fmt: Option<f::Format>) -> f::Properties {
let idx = fmt.map(|fmt| fmt as usize).unwrap_or(0);
self.format_properties.get(idx).properties
self.format_properties.resolve(idx).properties
}

fn image_format_properties(
Expand All @@ -318,7 +326,7 @@ impl adapter::PhysicalDevice<Backend> for PhysicalDevice {
view_caps: image::ViewCapabilities,
) -> Option<image::FormatProperties> {
conv::map_format(format)?; //filter out unknown formats
let format_info = self.format_properties.get(format as usize);
let format_info = self.format_properties.resolve(format as usize);

let supported_usage = {
use hal::image::Usage as U;
Expand Down Expand Up @@ -576,7 +584,7 @@ pub struct Device {
rtv_pool: Mutex<DescriptorCpuPool>,
dsv_pool: Mutex<DescriptorCpuPool>,
srv_uav_pool: Mutex<DescriptorCpuPool>,
descriptor_update_pools: Mutex<Vec<descriptors_cpu::HeapLinear>>,
descriptor_updater: Mutex<descriptors_cpu::DescriptorUpdater>,
// CPU/GPU descriptor heaps
heap_srv_cbv_uav: (
resource::DescriptorHeap,
Expand Down Expand Up @@ -634,6 +642,11 @@ impl Device {
2_048,
);

let descriptor_updater = descriptors_cpu::DescriptorUpdater::new(
device,
physical_device.workarounds.avoid_cpu_descriptor_overwrites,
);

let draw_signature = Self::create_command_signature(device, device::CommandSignature::Draw);
let draw_indexed_signature =
Self::create_command_signature(device, device::CommandSignature::DrawIndexed);
Expand Down Expand Up @@ -662,7 +675,7 @@ impl Device {
rtv_pool: Mutex::new(rtv_pool),
dsv_pool: Mutex::new(dsv_pool),
srv_uav_pool: Mutex::new(srv_uav_pool),
descriptor_update_pools: Mutex::new(Vec::new()),
descriptor_updater: Mutex::new(descriptor_updater),
heap_srv_cbv_uav: (heap_srv_cbv_uav, Mutex::new(view_range_allocator)),
samplers: SamplerStorage {
map: Mutex::default(),
Expand Down Expand Up @@ -707,9 +720,7 @@ impl Drop for Device {
self.dsv_pool.lock().destroy();
self.srv_uav_pool.lock().destroy();

for pool in &*self.descriptor_update_pools.lock() {
pool.destroy();
}
self.descriptor_updater.lock().destroy();

// Debug tracking alive objects
let (debug_device, hr_debug) = self.raw.cast::<d3d12sdklayers::ID3D12DebugDevice>();
Expand Down Expand Up @@ -888,11 +899,14 @@ impl hal::Instance<Backend> for Instance {
)
});

let mut workarounds = Workarounds::default();

let info = adapter::AdapterInfo {
name: device_name,
vendor: desc.VendorId as usize,
device: desc.DeviceId as usize,
device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 {
workarounds.avoid_cpu_descriptor_overwrites = true;
adapter::DeviceType::VirtualGpu
} else if features_architecture.CacheCoherentUMA == TRUE {
adapter::DeviceType::IntegratedGpu
Expand Down Expand Up @@ -1202,8 +1216,8 @@ impl hal::Instance<Backend> for Instance {
max_vertex_input_binding_stride: d3d12::D3D12_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES as _,
max_vertex_output_components: 16, // TODO
min_texel_buffer_offset_alignment: 1, // TODO
min_uniform_buffer_offset_alignment: 256, // Required alignment for CBVs
min_storage_buffer_offset_alignment: 1, // TODO
min_uniform_buffer_offset_alignment: d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as _,
min_storage_buffer_offset_alignment: 4, // TODO
framebuffer_color_sample_counts: sample_count_mask,
framebuffer_depth_sample_counts: sample_count_mask,
framebuffer_stencil_sample_counts: sample_count_mask,
Expand All @@ -1221,6 +1235,7 @@ impl hal::Instance<Backend> for Instance {
heterogeneous_resource_heaps,
memory_architecture,
},
workarounds,
heap_properties,
memory_properties: adapter::MemoryProperties {
memory_types,
Expand Down Expand Up @@ -1336,7 +1351,7 @@ impl FormatProperties {
}
}

fn get(&self, idx: usize) -> FormatInfo {
fn resolve(&self, idx: usize) -> FormatInfo {
let mut guard = self.info[idx].lock();
if let Some(info) = *guard {
return info;
Expand Down
10 changes: 4 additions & 6 deletions src/backend/dx12/src/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,12 +532,11 @@ pub struct DescriptorRange {
pub(crate) handle: DualHandle,
pub(crate) ty: pso::DescriptorType,
pub(crate) handle_size: u64,
pub(crate) count: u64,
}

impl DescriptorRange {
pub(crate) fn at(&self, index: DescriptorIndex) -> native::CpuDescriptor {
assert!(index < self.count);
assert!(index < self.handle.size);
let ptr = self.handle.cpu.ptr + (self.handle_size * index) as usize;
native::CpuDescriptor { ptr }
}
Expand Down Expand Up @@ -749,7 +748,7 @@ impl DescriptorHeapSlice {
/// Do not use this with handles not given out by this `DescriptorHeapSlice`.
pub(crate) fn free_handles(&mut self, handle: DualHandle) {
let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size;
let handle_range = start..start + handle.size as u64;
let handle_range = start..start + handle.size;
self.range_allocator.free_range(handle_range);
}

Expand Down Expand Up @@ -816,7 +815,6 @@ impl pso::DescriptorPool<Backend> for DescriptorPool {
Some(DescriptorRange {
handle,
ty: binding.ty,
count,
handle_size: self.heap_srv_cbv_uav.handle_size,
})
} else {
Expand Down Expand Up @@ -852,8 +850,8 @@ impl pso::DescriptorPool<Backend> for DescriptorPool {
I: IntoIterator<Item = DescriptorSet>,
{
for descriptor_set in descriptor_sets {
for binding_info in &descriptor_set.binding_infos {
if let Some(ref view_range) = binding_info.view_range {
for binding_info in descriptor_set.binding_infos {
if let Some(view_range) = binding_info.view_range {
if binding_info.content.intersects(DescriptorContent::VIEW) {
self.heap_srv_cbv_uav.free_handles(view_range.handle);
}
Expand Down

0 comments on commit b365bec

Please sign in to comment.