From 3a2bbfd563d3ca698ac84979b5758f3a8f600c35 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 5 Jul 2021 02:17:09 -0400 Subject: [PATCH 01/33] hal/dx12: adapter initialization --- Cargo.lock | 12 + wgpu-hal/Cargo.toml | 4 + wgpu-hal/src/dx12/adapter.rs | 319 +++++++++++++++++ wgpu-hal/src/dx12/conv.rs | 97 ++++++ wgpu-hal/src/dx12/mod.rs | 627 ++++++++++++++++++++++++++++++++++ wgpu-hal/src/gles/adapter.rs | 4 +- wgpu-hal/src/gles/conv.rs | 2 +- wgpu-hal/src/lib.rs | 4 + wgpu-hal/src/metal/adapter.rs | 21 +- wgpu-hal/src/vulkan/conv.rs | 2 +- 10 files changed, 1080 insertions(+), 12 deletions(-) create mode 100644 wgpu-hal/src/dx12/adapter.rs create mode 100644 wgpu-hal/src/dx12/conv.rs create mode 100644 wgpu-hal/src/dx12/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 9f32861144..f1b6a07a92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -444,6 +444,17 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "d3d12" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "091ed1b25fe47c7ff129fc440c23650b6114f36aa00bc7212cc8041879294428" +dependencies = [ + "bitflags", + "libloading 0.7.0", + "winapi 0.3.9", +] + [[package]] name = "darling" version = "0.10.2" @@ -1984,6 +1995,7 @@ dependencies = [ "bitflags", "block", "core-graphics-types", + "d3d12", "env_logger", "foreign-types", "fxhash", diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 17108e824d..9b4afceee5 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -16,6 +16,7 @@ default = [] metal = ["naga/msl-out", "block", "foreign-types"] vulkan = ["naga/spv-out", "ash", "gpu-alloc", "gpu-descriptor", "libloading", "inplace_it", "renderdoc-sys"] gles = ["naga/glsl-out", "glow", "egl", "libloading"] +dx12 = ["native", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] [dependencies] bitflags = "1.0" @@ -39,6 +40,9 @@ inplace_it = { version ="0.3.3", optional = true } renderdoc-sys = { version = "0.7.1", optional = true } # backend: Gles glow = { git = "https://github.com/grovesNL/glow", rev = "0864897a28bbdd43f89f4fd8fdd4ed781b719f8a", optional = true } +# backend: Dx12 +native = { package = "d3d12", version = "0.4", features = ["libloading"], optional = true } +#winapi = { version = "0.3", features = ["basetsd","d3dcommon","d3dcompiler","dxgi1_2","dxgi1_3","dxgi1_4","dxgi1_5","dxgi1_6","dxgidebug","dxgiformat","dxgitype","handleapi","minwindef","synchapi","unknwnbase","winbase","winerror","winnt"] } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] egl = { package = "khronos-egl", version = "4.1", features = ["dynamic"], optional = true } diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs new file mode 100644 index 0000000000..ff01aee89c --- /dev/null +++ b/wgpu-hal/src/dx12/adapter.rs @@ -0,0 +1,319 @@ +use super::{conv, HResultPair as _}; +use std::{mem, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_2, winerror}, + um::d3d12, +}; + +impl Drop for super::Adapter { + fn drop(&mut self) { + unsafe { + self.raw.destroy(); + } + } +} + +impl super::Adapter { + #[allow(trivial_casts)] + pub(super) fn expose( + adapter: native::WeakPtr, + library: &Arc, + ) -> Option> { + // Create the device so that we can get the capabilities. + let device = match library.create_device(adapter, native::FeatureLevel::L11_0) { + Ok(pair) => match pair.check() { + Ok(device) => device, + Err(err) => { + log::warn!("Device creation failed: {}", err); + return None; + } + }, + Err(err) => { + log::warn!("Device creation function is not found: {:?}", err); + return None; + } + }; + + // We have found a possible adapter. + // Acquire the device information. + let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() }; + unsafe { + adapter.GetDesc2(&mut desc); + } + + let device_name = { + use std::{ffi::OsString, os::windows::ffi::OsStringExt}; + let len = desc.Description.iter().take_while(|&&c| c != 0).count(); + let name = OsString::from_wide(&desc.Description[..len]); + name.to_string_lossy().into_owned() + }; + + let mut features_architecture: d3d12::D3D12_FEATURE_DATA_ARCHITECTURE = + unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_ARCHITECTURE, + &mut features_architecture as *mut _ as *mut _, + mem::size_of::() as _, + ) + }); + + let mut workarounds = super::Workarounds::default(); + + let info = wgt::AdapterInfo { + backend: wgt::Backend::Dx12, + name: device_name, + vendor: desc.VendorId as usize, + device: desc.DeviceId as usize, + device_type: if (desc.Flags & dxgi::DXGI_ADAPTER_FLAG_SOFTWARE) != 0 { + workarounds.avoid_cpu_descriptor_overwrites = true; + wgt::DeviceType::VirtualGpu + } else if features_architecture.CacheCoherentUMA != 0 { + wgt::DeviceType::IntegratedGpu + } else { + wgt::DeviceType::DiscreteGpu + }, + }; + + let mut options: d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS = unsafe { mem::zeroed() }; + assert_eq!(0, unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_D3D12_OPTIONS, + &mut options as *mut _ as *mut _, + mem::size_of::() as _, + ) + }); + + let _depth_bounds_test_supported = { + let mut features2: d3d12::D3D12_FEATURE_DATA_D3D12_OPTIONS2 = unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_D3D12_OPTIONS2, + &mut features2 as *mut _ as *mut _, + mem::size_of::() as _, + ) + }; + hr == 0 && features2.DepthBoundsTestSupported != 0 + }; + + let private_caps = super::PrivateCapabilities { + heterogeneous_resource_heaps: options.ResourceHeapTier + != d3d12::D3D12_RESOURCE_HEAP_TIER_1, + memory_architecture: if features_architecture.UMA != 0 { + super::MemoryArchitecture::Unified { + cache_coherent: features_architecture.CacheCoherentUMA != 0, + } + } else { + super::MemoryArchitecture::NonUnified + }, + }; + + // Theoretically vram limited, but in practice 2^20 is the limit + let tier3_practical_descriptor_limit = 1 << 20; + + let (full_heap_count, _uav_count) = match options.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => ( + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, // conservative, is 64 on feature level 11.1 + ), + d3d12::D3D12_RESOURCE_BINDING_TIER_2 => ( + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2, + 64, + ), + d3d12::D3D12_RESOURCE_BINDING_TIER_3 => ( + tier3_practical_descriptor_limit, + tier3_practical_descriptor_limit, + ), + other => { + log::warn!("Unknown resource binding tier {}", other); + ( + d3d12::D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, + 8, + ) + } + }; + + let mut features = wgt::Features::empty() + | wgt::Features::DEPTH_CLAMPING + //TODO: Naga part + //| wgt::Features::TEXTURE_BINDING_ARRAY + //| wgt::Features::BUFFER_BINDING_ARRAY + //| wgt::Features::STORAGE_RESOURCE_BINDING_ARRAY + //| wgt::Features::UNSIZED_BINDING_ARRAY + | wgt::Features::MULTI_DRAW_INDIRECT + | wgt::Features::MULTI_DRAW_INDIRECT_COUNT + | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER + | wgt::Features::NON_FILL_POLYGON_MODE + |wgt::Features::VERTEX_WRITABLE_STORAGE; + + features.set( + wgt::Features::CONSERVATIVE_RASTERIZATION, + options.ConservativeRasterizationTier + != d3d12::D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED, + ); + + let base = wgt::Limits::default(); + + Some(crate::ExposedAdapter { + adapter: super::Adapter { + raw: adapter, + device, + library: Arc::clone(library), + private_caps, + workarounds, + }, + info, + features, + capabilities: crate::Capabilities { + limits: wgt::Limits { + max_texture_dimension_1d: d3d12::D3D12_REQ_TEXTURE1D_U_DIMENSION, + max_texture_dimension_2d: d3d12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION + .min(d3d12::D3D12_REQ_TEXTURECUBE_DIMENSION), + max_texture_dimension_3d: d3d12::D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, + max_texture_array_layers: d3d12::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, + max_bind_groups: crate::MAX_BIND_GROUPS as u32, + // dynamic offsets take a root constant, so we expose the minimum here + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: match options.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 128, + d3d12::D3D12_RESOURCE_BINDING_TIER_2 + | d3d12::D3D12_RESOURCE_BINDING_TIER_3 + | _ => full_heap_count, + }, + max_samplers_per_shader_stage: match options.ResourceBindingTier { + d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 16, + d3d12::D3D12_RESOURCE_BINDING_TIER_2 + | d3d12::D3D12_RESOURCE_BINDING_TIER_3 + | _ => d3d12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + }, + // these both account towards `uav_count`, but we can't express the limit as as sum + max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage: base + .max_storage_textures_per_shader_stage, + max_uniform_buffers_per_shader_stage: full_heap_count, + max_uniform_buffer_binding_size: d3d12::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT + * 16, + max_storage_buffer_binding_size: !0, + max_vertex_buffers: d3d12::D3D12_VS_INPUT_REGISTER_COUNT + .min(crate::MAX_VERTEX_BUFFERS as u32), + max_vertex_attributes: d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, + max_vertex_buffer_array_stride: d3d12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, + max_push_constant_size: 0, + }, + alignments: crate::Alignments { + buffer_copy_offset: wgt::BufferSize::new( + d3d12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + buffer_copy_pitch: wgt::BufferSize::new( + d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT as u64, + ) + .unwrap(), + uniform_buffer_offset: wgt::BufferSize::new( + d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64, + ) + .unwrap(), + storage_buffer_offset: wgt::BufferSize::new(4).unwrap(), //TODO? + }, + downlevel: wgt::DownlevelCapabilities::default(), + }, + }) + } +} + +impl crate::Adapter for super::Adapter { + unsafe fn open( + &self, + _features: wgt::Features, + ) -> Result, crate::DeviceError> { + let queue = self + .device + .create_command_queue( + native::CmdListType::Direct, + native::Priority::Normal, + native::CommandQueueFlags::empty(), + 0, + ) + .check() + .map_err(|err| { + log::warn!("Queue creation failed: {}", err); + crate::DeviceError::OutOfMemory + })?; + + Ok(crate::OpenDevice { + device: super::Device { raw: self.device }, + queue: super::Queue { raw: queue }, + }) + } + + #[allow(trivial_casts)] + unsafe fn texture_format_capabilities( + &self, + format: wgt::TextureFormat, + ) -> crate::TextureFormatCapabilities { + use crate::TextureFormatCapabilities as Tfc; + + let info = format.describe(); + let is_compressed = info.block_dimensions != (1, 1); + let raw_format = conv::map_texture_format(format); + + let mut data = d3d12::D3D12_FEATURE_DATA_FORMAT_SUPPORT { + Format: raw_format, + Support1: mem::zeroed(), + Support2: mem::zeroed(), + }; + assert_eq!( + winerror::S_OK, + self.device.CheckFeatureSupport( + d3d12::D3D12_FEATURE_FORMAT_SUPPORT, + &mut data as *mut _ as *mut _, + mem::size_of::() as _, + ) + ); + + let mut caps = Tfc::COPY_SRC | Tfc::COPY_DST; + let can_image = 0 + != data.Support1 + & (d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE1D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE2D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURE3D + | d3d12::D3D12_FORMAT_SUPPORT1_TEXTURECUBE); + caps.set(Tfc::SAMPLED, can_image); + caps.set( + Tfc::SAMPLED_LINEAR, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_RENDER_TARGET != 0, + ); + caps.set( + Tfc::COLOR_ATTACHMENT_BLEND, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_BLENDABLE != 0, + ); + caps.set( + Tfc::DEPTH_STENCIL_ATTACHMENT, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL != 0, + ); + caps.set( + Tfc::STORAGE, + data.Support1 & d3d12::D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW != 0, + ); + caps.set( + Tfc::STORAGE_READ_WRITE, + data.Support2 & d3d12::D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD != 0, + ); + + caps + } + + unsafe fn surface_capabilities( + &self, + surface: &super::Surface, + ) -> Option { + None + } +} diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs new file mode 100644 index 0000000000..efe4b08797 --- /dev/null +++ b/wgpu-hal/src/dx12/conv.rs @@ -0,0 +1,97 @@ +use winapi::shared::dxgiformat; + +pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::TextureFormat as Tf; + use winapi::shared::dxgiformat::*; + + match format { + Tf::R8Unorm => DXGI_FORMAT_R8_UNORM, + Tf::R8Snorm => DXGI_FORMAT_R8_SNORM, + Tf::R8Uint => DXGI_FORMAT_R8_UINT, + Tf::R8Sint => DXGI_FORMAT_R8_SINT, + Tf::R16Uint => DXGI_FORMAT_R16_UINT, + Tf::R16Sint => DXGI_FORMAT_R16_SINT, + Tf::R16Float => DXGI_FORMAT_R16_FLOAT, + Tf::Rg8Unorm => DXGI_FORMAT_R8G8_UNORM, + Tf::Rg8Snorm => DXGI_FORMAT_R8G8_SNORM, + Tf::Rg8Uint => DXGI_FORMAT_R8G8_UINT, + Tf::Rg8Sint => DXGI_FORMAT_R8G8_SINT, + Tf::R32Uint => DXGI_FORMAT_R32_UINT, + Tf::R32Sint => DXGI_FORMAT_R32_SINT, + Tf::R32Float => DXGI_FORMAT_R32_FLOAT, + Tf::Rg16Uint => DXGI_FORMAT_R16G16_UINT, + Tf::Rg16Sint => DXGI_FORMAT_R16G16_SINT, + Tf::Rg16Float => DXGI_FORMAT_R16G16_FLOAT, + Tf::Rgba8Unorm => DXGI_FORMAT_R8G8B8A8_UNORM, + Tf::Rgba8UnormSrgb => DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, + Tf::Bgra8UnormSrgb => DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, + Tf::Rgba8Snorm => DXGI_FORMAT_R8G8B8A8_SNORM, + Tf::Bgra8Unorm => DXGI_FORMAT_B8G8R8A8_UNORM, + Tf::Rgba8Uint => DXGI_FORMAT_R8G8B8A8_UINT, + Tf::Rgba8Sint => DXGI_FORMAT_R8G8B8A8_SINT, + Tf::Rgb10a2Unorm => DXGI_FORMAT_R10G10B10A2_UNORM, + Tf::Rg11b10Float => DXGI_FORMAT_R11G11B10_FLOAT, + Tf::Rg32Uint => DXGI_FORMAT_R32G32_UINT, + Tf::Rg32Sint => DXGI_FORMAT_R32G32_SINT, + Tf::Rg32Float => DXGI_FORMAT_R32G32_FLOAT, + Tf::Rgba16Uint => DXGI_FORMAT_R16G16B16A16_UINT, + Tf::Rgba16Sint => DXGI_FORMAT_R16G16B16A16_SINT, + Tf::Rgba16Float => DXGI_FORMAT_R16G16B16A16_FLOAT, + Tf::Rgba32Uint => DXGI_FORMAT_R32G32B32A32_UINT, + Tf::Rgba32Sint => DXGI_FORMAT_R32G32B32A32_SINT, + Tf::Rgba32Float => DXGI_FORMAT_R32G32B32A32_FLOAT, + Tf::Depth32Float => DXGI_FORMAT_D32_FLOAT, + Tf::Depth24Plus => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Depth24PlusStencil8 => DXGI_FORMAT_D24_UNORM_S8_UINT, + Tf::Bc1RgbaUnorm => DXGI_FORMAT_BC1_UNORM, + Tf::Bc1RgbaUnormSrgb => DXGI_FORMAT_BC1_UNORM_SRGB, + Tf::Bc2RgbaUnorm => DXGI_FORMAT_BC2_UNORM, + Tf::Bc2RgbaUnormSrgb => DXGI_FORMAT_BC2_UNORM_SRGB, + Tf::Bc3RgbaUnorm => DXGI_FORMAT_BC3_UNORM, + Tf::Bc3RgbaUnormSrgb => DXGI_FORMAT_BC3_UNORM_SRGB, + Tf::Bc4RUnorm => DXGI_FORMAT_BC4_UNORM, + Tf::Bc4RSnorm => DXGI_FORMAT_BC4_SNORM, + Tf::Bc5RgUnorm => DXGI_FORMAT_BC5_UNORM, + Tf::Bc5RgSnorm => DXGI_FORMAT_BC5_SNORM, + Tf::Bc6hRgbUfloat => DXGI_FORMAT_BC6H_UF16, + Tf::Bc6hRgbSfloat => DXGI_FORMAT_BC6H_SF16, + Tf::Bc7RgbaUnorm => DXGI_FORMAT_BC7_UNORM, + Tf::Bc7RgbaUnormSrgb => DXGI_FORMAT_BC7_UNORM_SRGB, + Tf::Etc2RgbUnorm + | Tf::Etc2RgbUnormSrgb + | Tf::Etc2RgbA1Unorm + | Tf::Etc2RgbA1UnormSrgb + | Tf::EacRUnorm + | Tf::EacRSnorm + | Tf::EacRgUnorm + | Tf::EacRgSnorm + | Tf::Astc4x4RgbaUnorm + | Tf::Astc4x4RgbaUnormSrgb + | Tf::Astc5x4RgbaUnorm + | Tf::Astc5x4RgbaUnormSrgb + | Tf::Astc5x5RgbaUnorm + | Tf::Astc5x5RgbaUnormSrgb + | Tf::Astc6x5RgbaUnorm + | Tf::Astc6x5RgbaUnormSrgb + | Tf::Astc6x6RgbaUnorm + | Tf::Astc6x6RgbaUnormSrgb + | Tf::Astc8x5RgbaUnorm + | Tf::Astc8x5RgbaUnormSrgb + | Tf::Astc8x6RgbaUnorm + | Tf::Astc8x6RgbaUnormSrgb + | Tf::Astc10x5RgbaUnorm + | Tf::Astc10x5RgbaUnormSrgb + | Tf::Astc10x6RgbaUnorm + | Tf::Astc10x6RgbaUnormSrgb + | Tf::Astc8x8RgbaUnorm + | Tf::Astc8x8RgbaUnormSrgb + | Tf::Astc10x8RgbaUnorm + | Tf::Astc10x8RgbaUnormSrgb + | Tf::Astc10x10RgbaUnorm + | Tf::Astc10x10RgbaUnormSrgb + | Tf::Astc12x10RgbaUnorm + | Tf::Astc12x10RgbaUnormSrgb + | Tf::Astc12x12RgbaUnorm + | Tf::Astc12x12RgbaUnormSrgb => unreachable!(), + } +} diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs new file mode 100644 index 0000000000..1ca55e8d68 --- /dev/null +++ b/wgpu-hal/src/dx12/mod.rs @@ -0,0 +1,627 @@ +/*! +# DirectX12 API internals. + +## Pipeline Layout + +!*/ + +#![allow(unused_variables)] + +mod adapter; +mod conv; + +use std::{borrow::Cow, ops::Range, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, windef, winerror}, + Interface as _, +}; + +#[derive(Clone)] +pub struct Api; +//TODO: remove these temporaries +pub struct Encoder; +#[derive(Debug)] +pub struct Resource; + +type DeviceResult = Result; + +impl crate::Api for Api { + type Instance = Instance; + type Surface = Surface; + type Adapter = Adapter; + type Device = Device; + + type Queue = Queue; + type CommandEncoder = Encoder; + type CommandBuffer = Resource; + + type Buffer = Resource; + type Texture = Resource; + type SurfaceTexture = Resource; + type TextureView = Resource; + type Sampler = Resource; + type QuerySet = Resource; + type Fence = Resource; + + type BindGroupLayout = Resource; + type BindGroup = Resource; + type PipelineLayout = Resource; + type ShaderModule = Resource; + type RenderPipeline = Resource; + type ComputePipeline = Resource; +} + +trait HResult { + fn to_error(self) -> Option>; +} +impl HResult for i32 { + fn to_error(self) -> Option> { + if self >= 0 { + return None; + } + let description = match self { + winerror::E_UNEXPECTED => "unexpected", + winerror::E_NOTIMPL => "not implemented", + winerror::E_OUTOFMEMORY => "out of memory", + winerror::E_INVALIDARG => "invalid argument", + _ => return Some(Cow::Owned(format!("0x{:X}", self as u32))), + }; + Some(Cow::Borrowed(description)) + } +} + +trait HResultPair { + type Object; + fn check(self) -> Result>; +} +impl HResultPair for (T, i32) { + type Object = T; + fn check(self) -> Result> { + match self.1.to_error() { + None => Ok(self.0), + Some(err) => Err(err), + } + } +} + +pub struct Instance { + factory: native::Factory4, + library: Arc, + lib_dxgi: native::DxgiLib, +} + +impl Drop for Instance { + fn drop(&mut self) { + unsafe { + self.factory.destroy(); + } + } +} + +unsafe impl Send for Instance {} +unsafe impl Sync for Instance {} + +pub struct Surface { + factory: native::WeakPtr, + wnd_handle: windef::HWND, + //presentation: Option, +} + +unsafe impl Send for Surface {} +unsafe impl Sync for Surface {} + +#[derive(Debug, Clone, Copy)] +enum MemoryArchitecture { + Unified { cache_coherent: bool }, + NonUnified, +} + +#[derive(Debug, Clone, Copy)] +struct PrivateCapabilities { + heterogeneous_resource_heaps: bool, + memory_architecture: MemoryArchitecture, +} + +#[derive(Default)] +struct Workarounds { + // On WARP, temporary CPU descriptors are still used by the runtime + // after we call `CopyDescriptors`. + avoid_cpu_descriptor_overwrites: bool, +} + +pub struct Adapter { + raw: native::WeakPtr, + device: native::Device, + library: Arc, + private_caps: PrivateCapabilities, + workarounds: Workarounds, +} + +unsafe impl Send for Adapter {} +unsafe impl Sync for Adapter {} + +pub struct Device { + raw: native::Device, +} + +unsafe impl Send for Device {} +unsafe impl Sync for Device {} + +pub struct Queue { + raw: native::CommandQueue, +} + +unsafe impl Send for Queue {} +unsafe impl Sync for Queue {} + +impl crate::Instance for Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { + let lib_main = native::D3D12Lib::new().map_err(|_| crate::InstanceError)?; + + let lib_dxgi = native::DxgiLib::new().map_err(|_| crate::InstanceError)?; + let mut factory_flags = native::FactoryCreationFlags::empty(); + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok(pair) => match pair.check() { + Ok(debug_controller) => { + debug_controller.enable_layer(); + debug_controller.Release(); + } + Err(err) => { + log::warn!("Unable to enable D3D12 debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for D3D12 not found: {:?}", err); + } + } + + // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to + // `CreateDXGIFactory2` if the debug interface is actually available. So + // we check for whether it exists first. + match lib_dxgi.get_debug_interface1() { + Ok(pair) => match pair.check() { + Ok(debug_controller) => { + debug_controller.destroy(); + factory_flags |= native::FactoryCreationFlags::DEBUG; + } + Err(err) => { + log::warn!("Unable to enable DXGI debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for DXGI not found: {:?}", err); + } + } + } + + // Create DXGI factory + let factory = match lib_dxgi.create_factory2(factory_flags) { + Ok(pair) => match pair.check() { + Ok(factory) => factory, + Err(err) => { + log::warn!("Failed to create DXGI factory: {}", err); + return Err(crate::InstanceError); + } + }, + Err(err) => { + log::warn!("Factory creation function for DXGI not found: {:?}", err); + return Err(crate::InstanceError); + } + }; + + Ok(Self { + factory, + library: Arc::new(lib_main), + lib_dxgi, + }) + } + + unsafe fn create_surface( + &self, + has_handle: &impl raw_window_handle::HasRawWindowHandle, + ) -> Result { + match has_handle.raw_window_handle() { + raw_window_handle::RawWindowHandle::Windows(handle) => { + Ok(Surface { + factory: self.factory, + wnd_handle: handle.hwnd as *mut _, + //presentation: None, + }) + } + _ => Err(crate::InstanceError), + } + } + unsafe fn destroy_surface(&self, _surface: Surface) { + // just drop + } + + unsafe fn enumerate_adapters(&self) -> Vec> { + // Try to use high performance order by default (returns None on Windows < 1803) + let factory6 = match self.factory.cast::().check() { + Ok(f6) => { + // It's okay to decrement the refcount here because we + // have another reference to the factory already owned by `self`. + f6.destroy(); + Some(f6) + } + Err(err) => { + log::info!("Failed to cast DXGI to 1.6: {}", err); + None + } + }; + + // Enumerate adapters + let mut adapters = Vec::new(); + for cur_index in 0.. { + let raw = match factory6 { + Some(factory) => { + let mut adapter2 = native::WeakPtr::::null(); + let hr = factory.EnumAdapterByGpuPreference( + cur_index, + dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + &dxgi1_2::IDXGIAdapter2::uuidof(), + adapter2.mut_void(), + ); + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Some(err) = hr.to_error() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + adapter2 + } + None => { + let mut adapter1 = native::WeakPtr::::null(); + let hr = self + .factory + .EnumAdapters1(cur_index, adapter1.mut_void() as *mut *mut _); + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Some(err) = hr.to_error() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + match adapter1.cast::().check() { + Ok(adapter2) => { + adapter1.destroy(); + adapter2 + } + Err(err) => { + log::error!("Failed casting to Adapter2: {}", err); + break; + } + } + } + }; + + adapters.extend(Adapter::expose(raw, &self.library)); + } + adapters + } +} + +impl crate::Surface for Surface { + unsafe fn configure( + &mut self, + device: &Device, + config: &crate::SurfaceConfiguration, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } + + unsafe fn unconfigure(&mut self, device: &Device) {} + + unsafe fn acquire_texture( + &mut self, + timeout_ms: u32, + ) -> Result>, crate::SurfaceError> { + Ok(None) + } + unsafe fn discard_texture(&mut self, texture: Resource) {} +} + +impl crate::Queue for Queue { + unsafe fn submit( + &mut self, + command_buffers: &[&Resource], + signal_fence: Option<(&mut Resource, crate::FenceValue)>, + ) -> DeviceResult<()> { + Ok(()) + } + unsafe fn present( + &mut self, + surface: &mut Surface, + texture: Resource, + ) -> Result<(), crate::SurfaceError> { + Ok(()) + } +} + +impl crate::Device for Device { + unsafe fn exit(self) {} + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_buffer(&self, buffer: Resource) {} + unsafe fn map_buffer( + &self, + buffer: &Resource, + range: crate::MemoryRange, + ) -> DeviceResult { + Err(crate::DeviceError::Lost) + } + unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges(&self, buffer: &Resource, ranges: I) {} + unsafe fn invalidate_mapped_ranges(&self, buffer: &Resource, ranges: I) {} + + unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_texture(&self, texture: Resource) {} + unsafe fn create_texture_view( + &self, + texture: &Resource, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_texture_view(&self, view: Resource) {} + unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_sampler(&self, sampler: Resource) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor, + ) -> DeviceResult { + Ok(Encoder) + } + unsafe fn destroy_command_encoder(&self, encoder: Encoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_bind_group(&self, group: Resource) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result { + Ok(Resource) + } + unsafe fn destroy_shader_module(&self, module: Resource) {} + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor, + ) -> Result { + Ok(Resource) + } + unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {} + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor, + ) -> Result { + Ok(Resource) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_query_set(&self, set: Resource) {} + unsafe fn create_fence(&self) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_fence(&self, fence: Resource) {} + unsafe fn get_fence_value(&self, fence: &Resource) -> DeviceResult { + Ok(0) + } + unsafe fn wait( + &self, + fence: &Resource, + value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult { + Ok(true) + } + + unsafe fn start_capture(&self) -> bool { + false + } + unsafe fn stop_capture(&self) {} +} + +impl crate::CommandEncoder for Encoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> DeviceResult<()> { + Ok(()) + } + unsafe fn discard_encoding(&mut self) {} + unsafe fn end_encoding(&mut self) -> DeviceResult { + Ok(Resource) + } + unsafe fn reset_all(&mut self, command_buffers: I) {} + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator>, + { + } + + unsafe fn fill_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange, value: u8) {} + + unsafe fn copy_buffer_to_buffer(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + unsafe fn copy_texture_to_texture( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn copy_buffer_to_texture(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + unsafe fn copy_texture_to_buffer( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} + unsafe fn end_query(&mut self, set: &Resource, index: u32) {} + unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} + unsafe fn reset_queries(&mut self, set: &Resource, range: Range) {} + unsafe fn copy_query_results( + &mut self, + set: &Resource, + range: Range, + buffer: &Resource, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) {} + unsafe fn end_render_pass(&mut self) {} + + unsafe fn set_bind_group( + &mut self, + layout: &Resource, + index: u32, + group: &Resource, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + } + unsafe fn set_push_constants( + &mut self, + layout: &Resource, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + } + + unsafe fn insert_debug_marker(&mut self, label: &str) {} + unsafe fn begin_debug_marker(&mut self, group_label: &str) {} + unsafe fn end_debug_marker(&mut self) {} + + unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, Api>, + format: wgt::IndexFormat, + ) { + } + unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) { + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) {} + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) {} + unsafe fn set_stencil_reference(&mut self, value: u32) {} + unsafe fn set_blend_constants(&mut self, color: &wgt::Color) {} + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} + unsafe fn end_compute_pass(&mut self) {} + + unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn dispatch(&mut self, count: [u32; 3]) {} + unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} +} diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 00f5d7ea0d..153755d8f9 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -204,7 +204,9 @@ impl super::Adapter { vertex_shader_storage_blocks.min(fragment_shader_storage_blocks) }; - let mut features = wgt::Features::empty() | wgt::Features::TEXTURE_COMPRESSION_ETC2; + let mut features = wgt::Features::empty() + | wgt::Features::TEXTURE_COMPRESSION_ETC2 + | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES; features.set( wgt::Features::DEPTH_CLAMPING, extensions.contains("GL_EXT_depth_clamp"), diff --git a/wgpu-hal/src/gles/conv.rs b/wgpu-hal/src/gles/conv.rs index fbba13b96e..a5771be032 100644 --- a/wgpu-hal/src/gles/conv.rs +++ b/wgpu-hal/src/gles/conv.rs @@ -70,8 +70,8 @@ impl super::AdapterShared { | Tf::Bc4RSnorm | Tf::Bc5RgUnorm | Tf::Bc5RgSnorm - | Tf::Bc6hRgbSfloat | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbSfloat | Tf::Bc7RgbaUnorm | Tf::Bc7RgbaUnormSrgb => unimplemented!(), Tf::Etc2RgbUnorm => (glow::COMPRESSED_RGB8_ETC2, glow::RGB, 0), diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 00380cc664..58892a4f01 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -44,6 +44,8 @@ #[cfg(all(feature = "metal", not(any(target_os = "macos", target_os = "ios"))))] compile_error!("Metal backend enabled on non-Apple OS. If your project is not using resolver=\"2\" in Cargo.toml, it should."); +#[cfg(all(feature = "dx12", windows))] +mod dx12; mod empty; #[cfg(feature = "gles")] mod gles; @@ -54,6 +56,8 @@ mod vulkan; pub mod util; pub mod api { + #[cfg(feature = "dx12")] + pub use super::dx12::Api as Dx12; pub use super::empty::Api as Empty; #[cfg(feature = "gles")] pub use super::gles::Api as Gles; diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 927fff2471..0814df3b70 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -199,8 +199,8 @@ impl crate::Adapter for super::Adapter { | Tf::Bc4RSnorm | Tf::Bc5RgUnorm | Tf::Bc5RgSnorm - | Tf::Bc6hRgbSfloat | Tf::Bc6hRgbUfloat + | Tf::Bc6hRgbSfloat | Tf::Bc7RgbaUnorm | Tf::Bc7RgbaUnormSrgb => { if pc.format_bc { @@ -889,6 +889,7 @@ impl super::PrivateCapabilities { .flags .set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, true); + let base = wgt::Limits::default(); crate::Capabilities { limits: wgt::Limits { max_texture_dimension_1d: self.max_texture_size as u32, @@ -896,18 +897,20 @@ impl super::PrivateCapabilities { max_texture_dimension_3d: self.max_texture_3d_size as u32, max_texture_array_layers: self.max_texture_layers as u32, max_bind_groups: 8, - max_dynamic_uniform_buffers_per_pipeline_layout: 8, - max_dynamic_storage_buffers_per_pipeline_layout: 4, - max_sampled_textures_per_shader_stage: 16, + max_dynamic_uniform_buffers_per_pipeline_layout: base + .max_dynamic_uniform_buffers_per_pipeline_layout, + max_dynamic_storage_buffers_per_pipeline_layout: base + .max_dynamic_storage_buffers_per_pipeline_layout, + max_sampled_textures_per_shader_stage: base.max_sampled_textures_per_shader_stage, max_samplers_per_shader_stage: self.max_samplers_per_stage, - max_storage_buffers_per_shader_stage: 8, - max_storage_textures_per_shader_stage: 8, + max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, + max_storage_textures_per_shader_stage: base.max_storage_textures_per_shader_stage, max_uniform_buffers_per_shader_stage: 12, max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32, - max_vertex_buffers: 8, - max_vertex_attributes: 16, - max_vertex_buffer_array_stride: 2048, + max_vertex_buffers: base.max_vertex_buffers, + max_vertex_attributes: base.max_vertex_attributes, + max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride, max_push_constant_size: 0x1000, }, alignments: crate::Alignments { diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index 41a33082d0..b6bb4ca10b 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -66,8 +66,8 @@ impl super::PrivateCapabilities { Tf::Bc4RSnorm => F::BC4_SNORM_BLOCK, Tf::Bc5RgUnorm => F::BC5_UNORM_BLOCK, Tf::Bc5RgSnorm => F::BC5_SNORM_BLOCK, - Tf::Bc6hRgbSfloat => F::BC6H_SFLOAT_BLOCK, Tf::Bc6hRgbUfloat => F::BC6H_UFLOAT_BLOCK, + Tf::Bc6hRgbSfloat => F::BC6H_SFLOAT_BLOCK, Tf::Bc7RgbaUnorm => F::BC7_UNORM_BLOCK, Tf::Bc7RgbaUnormSrgb => F::BC7_SRGB_BLOCK, Tf::Etc2RgbUnorm => F::ETC2_R8G8B8_UNORM_BLOCK, From eb565a59f70bc117063a9a72f5762637dc87dc48 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 5 Jul 2021 12:42:11 -0400 Subject: [PATCH 02/33] hal/dx12: move device and encoder into modules --- wgpu-hal/src/dx12/adapter.rs | 66 +++++++- wgpu-hal/src/dx12/command.rs | 168 +++++++++++++++++++ wgpu-hal/src/dx12/device.rs | 121 ++++++++++++++ wgpu-hal/src/dx12/mod.rs | 304 ++--------------------------------- 4 files changed, 363 insertions(+), 296 deletions(-) create mode 100644 wgpu-hal/src/dx12/command.rs create mode 100644 wgpu-hal/src/dx12/device.rs diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index ff01aee89c..6527f58830 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -1,8 +1,8 @@ -use super::{conv, HResultPair as _}; +use super::{conv, HResult as _, HResultPair as _}; use std::{mem, sync::Arc}; use winapi::{ - shared::{dxgi, dxgi1_2, winerror}, - um::d3d12, + shared::{dxgi, dxgi1_2, dxgi1_5, minwindef, windef, winerror}, + um::{d3d12, winuser}, }; impl Drop for super::Adapter { @@ -314,6 +314,64 @@ impl crate::Adapter for super::Adapter { &self, surface: &super::Surface, ) -> Option { - None + let current_extent = { + let mut rect: windef::RECT = mem::zeroed(); + if winuser::GetClientRect(surface.wnd_handle, &mut rect) != 0 { + Some(wgt::Extent3d { + width: (rect.right - rect.left) as u32, + height: (rect.bottom - rect.top) as u32, + depth_or_array_layers: 1, + }) + } else { + log::warn!("Unable to get the window client rect"); + None + } + }; + + let mut present_modes = vec![wgt::PresentMode::Fifo]; + #[allow(trivial_casts)] + if let Ok(factory5) = surface.factory.cast::().check() { + let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; + let hr = factory5.CheckFeatureSupport( + dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &mut allow_tearing as *mut _ as *mut _, + mem::size_of::() as _, + ); + + factory5.destroy(); + match hr.to_error() { + Some(err) => log::warn!("Unable to check for tearing support: {}", err), + None => present_modes.push(wgt::PresentMode::Immediate), + } + } + + Some(crate::SurfaceCapabilities { + formats: vec![ + wgt::TextureFormat::Bgra8UnormSrgb, + wgt::TextureFormat::Bgra8Unorm, + wgt::TextureFormat::Rgba8UnormSrgb, + wgt::TextureFormat::Rgba8Unorm, + wgt::TextureFormat::Rgb10a2Unorm, + wgt::TextureFormat::Rgba16Float, + ], + // we currently use a flip effect which supports 2..=16 buffers + swap_chain_sizes: 2..=16, + current_extent, + // TODO: figure out the exact bounds + extents: wgt::Extent3d { + width: 16, + height: 16, + depth_or_array_layers: 1, + }..=wgt::Extent3d { + width: 4096, + height: 4096, + depth_or_array_layers: 1, + }, + usage: crate::TextureUses::COLOR_TARGET + | crate::TextureUses::COPY_SRC + | crate::TextureUses::COPY_DST, + present_modes, + composite_alpha_modes: vec![crate::CompositeAlphaMode::Opaque], + }) } } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs new file mode 100644 index 0000000000..3ed82d4385 --- /dev/null +++ b/wgpu-hal/src/dx12/command.rs @@ -0,0 +1,168 @@ +use super::Resource; +use std::ops::Range; + +impl crate::CommandEncoder for super::Encoder { + unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + Ok(()) + } + unsafe fn discard_encoding(&mut self) {} + unsafe fn end_encoding(&mut self) -> Result { + Ok(Resource) + } + unsafe fn reset_all(&mut self, command_buffers: I) {} + + unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) + where + T: Iterator>, + { + } + + unsafe fn transition_textures<'a, T>(&mut self, barriers: T) + where + T: Iterator>, + { + } + + unsafe fn fill_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange, value: u8) {} + + unsafe fn copy_buffer_to_buffer(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + unsafe fn copy_texture_to_texture( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn copy_buffer_to_texture(&mut self, src: &Resource, dst: &Resource, regions: T) {} + + unsafe fn copy_texture_to_buffer( + &mut self, + src: &Resource, + src_usage: crate::TextureUses, + dst: &Resource, + regions: T, + ) { + } + + unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} + unsafe fn end_query(&mut self, set: &Resource, index: u32) {} + unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} + unsafe fn reset_queries(&mut self, set: &Resource, range: Range) {} + unsafe fn copy_query_results( + &mut self, + set: &Resource, + range: Range, + buffer: &Resource, + offset: wgt::BufferAddress, + stride: wgt::BufferSize, + ) { + } + + // render + + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) {} + unsafe fn end_render_pass(&mut self) {} + + unsafe fn set_bind_group( + &mut self, + layout: &Resource, + index: u32, + group: &Resource, + dynamic_offsets: &[wgt::DynamicOffset], + ) { + } + unsafe fn set_push_constants( + &mut self, + layout: &Resource, + stages: wgt::ShaderStages, + offset: u32, + data: &[u32], + ) { + } + + unsafe fn insert_debug_marker(&mut self, label: &str) {} + unsafe fn begin_debug_marker(&mut self, group_label: &str) {} + unsafe fn end_debug_marker(&mut self) {} + + unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn set_index_buffer<'a>( + &mut self, + binding: crate::BufferBinding<'a, super::Api>, + format: wgt::IndexFormat, + ) { + } + unsafe fn set_vertex_buffer<'a>( + &mut self, + index: u32, + binding: crate::BufferBinding<'a, super::Api>, + ) { + } + unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) {} + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) {} + unsafe fn set_stencil_reference(&mut self, value: u32) {} + unsafe fn set_blend_constants(&mut self, color: &wgt::Color) {} + + unsafe fn draw( + &mut self, + start_vertex: u32, + vertex_count: u32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indexed( + &mut self, + start_index: u32, + index_count: u32, + base_vertex: i32, + start_instance: u32, + instance_count: u32, + ) { + } + unsafe fn draw_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indexed_indirect( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + draw_count: u32, + ) { + } + unsafe fn draw_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + unsafe fn draw_indexed_indirect_count( + &mut self, + buffer: &Resource, + offset: wgt::BufferAddress, + count_buffer: &Resource, + count_offset: wgt::BufferAddress, + max_count: u32, + ) { + } + + // compute + + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} + unsafe fn end_compute_pass(&mut self) {} + + unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} + + unsafe fn dispatch(&mut self, count: [u32; 3]) {} + unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs new file mode 100644 index 0000000000..73fbb885c5 --- /dev/null +++ b/wgpu-hal/src/dx12/device.rs @@ -0,0 +1,121 @@ +//TODO: remove this +use super::{Encoder, Resource}; +type DeviceResult = Result; + +impl crate::Device for super::Device { + unsafe fn exit(self) {} + unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_buffer(&self, buffer: Resource) {} + unsafe fn map_buffer( + &self, + buffer: &Resource, + range: crate::MemoryRange, + ) -> DeviceResult { + Err(crate::DeviceError::Lost) + } + unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { + Ok(()) + } + unsafe fn flush_mapped_ranges(&self, buffer: &Resource, ranges: I) {} + unsafe fn invalidate_mapped_ranges(&self, buffer: &Resource, ranges: I) {} + + unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_texture(&self, texture: Resource) {} + unsafe fn create_texture_view( + &self, + texture: &Resource, + desc: &crate::TextureViewDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_texture_view(&self, view: Resource) {} + unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_sampler(&self, sampler: Resource) {} + + unsafe fn create_command_encoder( + &self, + desc: &crate::CommandEncoderDescriptor, + ) -> DeviceResult { + Ok(Encoder) + } + unsafe fn destroy_command_encoder(&self, encoder: Encoder) {} + + unsafe fn create_bind_group_layout( + &self, + desc: &crate::BindGroupLayoutDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} + unsafe fn create_pipeline_layout( + &self, + desc: &crate::PipelineLayoutDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} + unsafe fn create_bind_group( + &self, + desc: &crate::BindGroupDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_bind_group(&self, group: Resource) {} + + unsafe fn create_shader_module( + &self, + desc: &crate::ShaderModuleDescriptor, + shader: crate::ShaderInput, + ) -> Result { + Ok(Resource) + } + unsafe fn destroy_shader_module(&self, module: Resource) {} + unsafe fn create_render_pipeline( + &self, + desc: &crate::RenderPipelineDescriptor, + ) -> Result { + Ok(Resource) + } + unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {} + unsafe fn create_compute_pipeline( + &self, + desc: &crate::ComputePipelineDescriptor, + ) -> Result { + Ok(Resource) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} + + unsafe fn create_query_set( + &self, + desc: &wgt::QuerySetDescriptor, + ) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_query_set(&self, set: Resource) {} + unsafe fn create_fence(&self) -> DeviceResult { + Ok(Resource) + } + unsafe fn destroy_fence(&self, fence: Resource) {} + unsafe fn get_fence_value(&self, fence: &Resource) -> DeviceResult { + Ok(0) + } + unsafe fn wait( + &self, + fence: &Resource, + value: crate::FenceValue, + timeout_ms: u32, + ) -> DeviceResult { + Ok(true) + } + + unsafe fn start_capture(&self) -> bool { + false + } + unsafe fn stop_capture(&self) {} +} diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 1ca55e8d68..669d5e2392 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -8,9 +8,11 @@ #![allow(unused_variables)] mod adapter; +mod command; mod conv; +mod device; -use std::{borrow::Cow, ops::Range, sync::Arc}; +use std::{borrow::Cow, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, windef, winerror}, Interface as _, @@ -23,8 +25,6 @@ pub struct Encoder; #[derive(Debug)] pub struct Resource; -type DeviceResult = Result; - impl crate::Api for Api { type Instance = Instance; type Surface = Surface; @@ -101,10 +101,12 @@ impl Drop for Instance { unsafe impl Send for Instance {} unsafe impl Sync for Instance {} +struct SwapChain {} + pub struct Surface { factory: native::WeakPtr, wnd_handle: windef::HWND, - //presentation: Option, + swap_chain: Option, } unsafe impl Send for Surface {} @@ -224,13 +226,11 @@ impl crate::Instance for Instance { has_handle: &impl raw_window_handle::HasRawWindowHandle, ) -> Result { match has_handle.raw_window_handle() { - raw_window_handle::RawWindowHandle::Windows(handle) => { - Ok(Surface { - factory: self.factory, - wnd_handle: handle.hwnd as *mut _, - //presentation: None, - }) - } + raw_window_handle::RawWindowHandle::Windows(handle) => Ok(Surface { + factory: self.factory, + wnd_handle: handle.hwnd as *mut _, + swap_chain: None, + }), _ => Err(crate::InstanceError), } } @@ -334,7 +334,7 @@ impl crate::Queue for Queue { &mut self, command_buffers: &[&Resource], signal_fence: Option<(&mut Resource, crate::FenceValue)>, - ) -> DeviceResult<()> { + ) -> Result<(), crate::DeviceError> { Ok(()) } unsafe fn present( @@ -345,283 +345,3 @@ impl crate::Queue for Queue { Ok(()) } } - -impl crate::Device for Device { - unsafe fn exit(self) {} - unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_buffer(&self, buffer: Resource) {} - unsafe fn map_buffer( - &self, - buffer: &Resource, - range: crate::MemoryRange, - ) -> DeviceResult { - Err(crate::DeviceError::Lost) - } - unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { - Ok(()) - } - unsafe fn flush_mapped_ranges(&self, buffer: &Resource, ranges: I) {} - unsafe fn invalidate_mapped_ranges(&self, buffer: &Resource, ranges: I) {} - - unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_texture(&self, texture: Resource) {} - unsafe fn create_texture_view( - &self, - texture: &Resource, - desc: &crate::TextureViewDescriptor, - ) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_texture_view(&self, view: Resource) {} - unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_sampler(&self, sampler: Resource) {} - - unsafe fn create_command_encoder( - &self, - desc: &crate::CommandEncoderDescriptor, - ) -> DeviceResult { - Ok(Encoder) - } - unsafe fn destroy_command_encoder(&self, encoder: Encoder) {} - - unsafe fn create_bind_group_layout( - &self, - desc: &crate::BindGroupLayoutDescriptor, - ) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} - unsafe fn create_pipeline_layout( - &self, - desc: &crate::PipelineLayoutDescriptor, - ) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} - unsafe fn create_bind_group( - &self, - desc: &crate::BindGroupDescriptor, - ) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_bind_group(&self, group: Resource) {} - - unsafe fn create_shader_module( - &self, - desc: &crate::ShaderModuleDescriptor, - shader: crate::ShaderInput, - ) -> Result { - Ok(Resource) - } - unsafe fn destroy_shader_module(&self, module: Resource) {} - unsafe fn create_render_pipeline( - &self, - desc: &crate::RenderPipelineDescriptor, - ) -> Result { - Ok(Resource) - } - unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {} - unsafe fn create_compute_pipeline( - &self, - desc: &crate::ComputePipelineDescriptor, - ) -> Result { - Ok(Resource) - } - unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} - - unsafe fn create_query_set( - &self, - desc: &wgt::QuerySetDescriptor, - ) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_query_set(&self, set: Resource) {} - unsafe fn create_fence(&self) -> DeviceResult { - Ok(Resource) - } - unsafe fn destroy_fence(&self, fence: Resource) {} - unsafe fn get_fence_value(&self, fence: &Resource) -> DeviceResult { - Ok(0) - } - unsafe fn wait( - &self, - fence: &Resource, - value: crate::FenceValue, - timeout_ms: u32, - ) -> DeviceResult { - Ok(true) - } - - unsafe fn start_capture(&self) -> bool { - false - } - unsafe fn stop_capture(&self) {} -} - -impl crate::CommandEncoder for Encoder { - unsafe fn begin_encoding(&mut self, label: crate::Label) -> DeviceResult<()> { - Ok(()) - } - unsafe fn discard_encoding(&mut self) {} - unsafe fn end_encoding(&mut self) -> DeviceResult { - Ok(Resource) - } - unsafe fn reset_all(&mut self, command_buffers: I) {} - - unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) - where - T: Iterator>, - { - } - - unsafe fn transition_textures<'a, T>(&mut self, barriers: T) - where - T: Iterator>, - { - } - - unsafe fn fill_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange, value: u8) {} - - unsafe fn copy_buffer_to_buffer(&mut self, src: &Resource, dst: &Resource, regions: T) {} - - unsafe fn copy_texture_to_texture( - &mut self, - src: &Resource, - src_usage: crate::TextureUses, - dst: &Resource, - regions: T, - ) { - } - - unsafe fn copy_buffer_to_texture(&mut self, src: &Resource, dst: &Resource, regions: T) {} - - unsafe fn copy_texture_to_buffer( - &mut self, - src: &Resource, - src_usage: crate::TextureUses, - dst: &Resource, - regions: T, - ) { - } - - unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} - unsafe fn end_query(&mut self, set: &Resource, index: u32) {} - unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} - unsafe fn reset_queries(&mut self, set: &Resource, range: Range) {} - unsafe fn copy_query_results( - &mut self, - set: &Resource, - range: Range, - buffer: &Resource, - offset: wgt::BufferAddress, - stride: wgt::BufferSize, - ) { - } - - // render - - unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) {} - unsafe fn end_render_pass(&mut self) {} - - unsafe fn set_bind_group( - &mut self, - layout: &Resource, - index: u32, - group: &Resource, - dynamic_offsets: &[wgt::DynamicOffset], - ) { - } - unsafe fn set_push_constants( - &mut self, - layout: &Resource, - stages: wgt::ShaderStages, - offset: u32, - data: &[u32], - ) { - } - - unsafe fn insert_debug_marker(&mut self, label: &str) {} - unsafe fn begin_debug_marker(&mut self, group_label: &str) {} - unsafe fn end_debug_marker(&mut self) {} - - unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} - - unsafe fn set_index_buffer<'a>( - &mut self, - binding: crate::BufferBinding<'a, Api>, - format: wgt::IndexFormat, - ) { - } - unsafe fn set_vertex_buffer<'a>(&mut self, index: u32, binding: crate::BufferBinding<'a, Api>) { - } - unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) {} - unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) {} - unsafe fn set_stencil_reference(&mut self, value: u32) {} - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) {} - - unsafe fn draw( - &mut self, - start_vertex: u32, - vertex_count: u32, - start_instance: u32, - instance_count: u32, - ) { - } - unsafe fn draw_indexed( - &mut self, - start_index: u32, - index_count: u32, - base_vertex: i32, - start_instance: u32, - instance_count: u32, - ) { - } - unsafe fn draw_indirect( - &mut self, - buffer: &Resource, - offset: wgt::BufferAddress, - draw_count: u32, - ) { - } - unsafe fn draw_indexed_indirect( - &mut self, - buffer: &Resource, - offset: wgt::BufferAddress, - draw_count: u32, - ) { - } - unsafe fn draw_indirect_count( - &mut self, - buffer: &Resource, - offset: wgt::BufferAddress, - count_buffer: &Resource, - count_offset: wgt::BufferAddress, - max_count: u32, - ) { - } - unsafe fn draw_indexed_indirect_count( - &mut self, - buffer: &Resource, - offset: wgt::BufferAddress, - count_buffer: &Resource, - count_offset: wgt::BufferAddress, - max_count: u32, - ) { - } - - // compute - - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} - unsafe fn end_compute_pass(&mut self) {} - - unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} - - unsafe fn dispatch(&mut self, count: [u32; 3]) {} - unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} -} From 0942fb13461ed0d74ac756b52c89cbdf78f60840 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 5 Jul 2021 15:37:02 -0400 Subject: [PATCH 03/33] hal/dx12: swap chain configuration --- wgpu-hal/src/dx12/adapter.rs | 11 +- wgpu-hal/src/dx12/conv.rs | 21 +++- wgpu-hal/src/dx12/mod.rs | 189 ++++++++++++++++++++++++++++++++++- 3 files changed, 214 insertions(+), 7 deletions(-) diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 6527f58830..52b29d4c21 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -244,7 +244,10 @@ impl crate::Adapter for super::Adapter { })?; Ok(crate::OpenDevice { - device: super::Device { raw: self.device }, + device: super::Device { + raw: self.device, + present_queue: queue, + }, queue: super::Queue { raw: queue }, }) } @@ -371,7 +374,11 @@ impl crate::Adapter for super::Adapter { | crate::TextureUses::COPY_SRC | crate::TextureUses::COPY_DST, present_modes, - composite_alpha_modes: vec![crate::CompositeAlphaMode::Opaque], + composite_alpha_modes: vec![ + crate::CompositeAlphaMode::Opaque, + crate::CompositeAlphaMode::PreMultiplied, + crate::CompositeAlphaMode::PostMultiplied, + ], }) } } diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index efe4b08797..f049f8c319 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -1,4 +1,4 @@ -use winapi::shared::dxgiformat; +use winapi::shared::{dxgi1_2, dxgiformat}; pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { use wgt::TextureFormat as Tf; @@ -95,3 +95,22 @@ pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI | Tf::Astc12x12RgbaUnormSrgb => unreachable!(), } } + +pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + // NOTE: DXGI doesn't allow sRGB format on the swapchain, but + // creating RTV of swapchain buffers with sRGB works + match format { + wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, + wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + _ => map_texture_format(format), + } +} + +pub fn map_acomposite_alpha_mode(mode: crate::CompositeAlphaMode) -> dxgi1_2::DXGI_ALPHA_MODE { + use crate::CompositeAlphaMode as Cam; + match mode { + Cam::Opaque => dxgi1_2::DXGI_ALPHA_MODE_IGNORE, + Cam::PreMultiplied => dxgi1_2::DXGI_ALPHA_MODE_PREMULTIPLIED, + Cam::PostMultiplied => dxgi1_2::DXGI_ALPHA_MODE_STRAIGHT, + } +} diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 669d5e2392..8aef919369 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -12,9 +12,10 @@ mod command; mod conv; mod device; -use std::{borrow::Cow, sync::Arc}; +use std::{borrow::Cow, ptr, sync::Arc}; use winapi::{ - shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, windef, winerror}, + shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, dxgitype, windef, winerror}, + um::{d3d12, synchapi, winbase, winnt}, Interface as _, }; @@ -101,7 +102,54 @@ impl Drop for Instance { unsafe impl Send for Instance {} unsafe impl Sync for Instance {} -struct SwapChain {} +#[derive(Copy, Clone)] +struct DualHandle { + cpu: native::CpuDescriptor, + gpu: native::GpuDescriptor, + /// How large the block allocated to this handle is. + size: u64, +} + +type DescriptorIndex = u64; + +struct DescriptorHeap { + raw: native::DescriptorHeap, + handle_size: u64, + total_handles: u64, + start: DualHandle, +} + +impl DescriptorHeap { + fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + size, + } + } + + fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { + native::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { + native::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } +} + +struct SwapChain { + raw: native::WeakPtr, + // need to associate raw image pointers with the swapchain so they can be properly released + // when the swapchain is destroyed + resources: Vec, + waitable: winnt::HANDLE, + acquired_count: usize, +} pub struct Surface { factory: native::WeakPtr, @@ -144,6 +192,7 @@ unsafe impl Sync for Adapter {} pub struct Device { raw: native::Device, + present_queue: native::CommandQueue, } unsafe impl Send for Device {} @@ -309,16 +358,148 @@ impl crate::Instance for Instance { } } +impl SwapChain { + unsafe fn release_resources(self) -> native::WeakPtr { + for resource in self.resources { + resource.destroy(); + } + self.raw + } + + unsafe fn wait(&mut self, timeout_ms: u32) -> Result { + match synchapi::WaitForSingleObject(self.waitable, timeout_ms) { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::SurfaceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::SurfaceError::Lost) + } + } + } +} + impl crate::Surface for Surface { unsafe fn configure( &mut self, device: &Device, config: &crate::SurfaceConfiguration, ) -> Result<(), crate::SurfaceError> { + let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + match config.present_mode { + wgt::PresentMode::Immediate => { + flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; + } + _ => {} + } + + let non_srgb_format = conv::map_texture_format_nosrgb(config.format); + + let swap_chain = match self.swap_chain.take() { + Some(sc) => { + // can't have image resources in flight used by GPU + //device.wait_idle().unwrap(); + + let raw = sc.release_resources(); + let result = raw.ResizeBuffers( + config.swap_chain_size, + config.extent.width, + config.extent.height, + non_srgb_format, + flags, + ); + if let Some(err) = result.to_error() { + log::error!("ResizeBuffers failed: {}", err); + return Err(crate::SurfaceError::Other("window is in use")); + } + raw + } + None => { + let mut swap_chain1 = native::WeakPtr::::null(); + + let raw_desc = dxgi1_2::DXGI_SWAP_CHAIN_DESC1 { + AlphaMode: conv::map_acomposite_alpha_mode(config.composite_alpha_mode), + BufferCount: config.swap_chain_size, + Width: config.extent.width, + Height: config.extent.height, + Format: non_srgb_format, + Flags: flags, + BufferUsage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Scaling: dxgi1_2::DXGI_SCALING_STRETCH, + Stereo: 0, + SwapEffect: dxgi::DXGI_SWAP_EFFECT_FLIP_DISCARD, + }; + + let hr = self.factory.CreateSwapChainForHwnd( + device.present_queue.as_mut_ptr() as *mut _, + self.wnd_handle, + &raw_desc, + ptr::null(), + ptr::null_mut(), + swap_chain1.mut_void() as *mut *mut _, + ); + + if let Some(err) = hr.to_error() { + log::error!("SwapChain creation error: {}", err); + return Err(crate::SurfaceError::Other("swap chain creation")); + } + + match swap_chain1.cast::().check() { + Ok(swap_chain3) => { + swap_chain1.destroy(); + swap_chain3 + } + Err(err) => { + log::error!("Unable to cast swap chain: {}", err); + return Err(crate::SurfaceError::Other("swap chain cast to 3")); + } + } + } + }; + + // Disable automatic Alt+Enter handling by DXGI. + const DXGI_MWA_NO_WINDOW_CHANGES: u32 = 1; + const DXGI_MWA_NO_ALT_ENTER: u32 = 2; + self.factory.MakeWindowAssociation( + self.wnd_handle, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER, + ); + + swap_chain.SetMaximumFrameLatency(config.swap_chain_size); + let waitable = swap_chain.GetFrameLatencyWaitableObject(); + + let mut resources = vec![native::Resource::null(); config.swap_chain_size as usize]; + for (i, res) in resources.iter_mut().enumerate() { + swap_chain.GetBuffer(i as _, &d3d12::ID3D12Resource::uuidof(), res.mut_void()); + } + + self.swap_chain = Some(SwapChain { + raw: swap_chain, + resources, + waitable, + acquired_count: 0, + //format: config.format, + //size: config.extent, + //mode: config.present_mode, + }); + Ok(()) } - unsafe fn unconfigure(&mut self, device: &Device) {} + unsafe fn unconfigure(&mut self, device: &Device) { + if let Some(mut sc) = self.swap_chain.take() { + let _ = sc.wait(winbase::INFINITE); + //TODO: this shouldn't be needed, + // but it complains that the queue is still used otherwise + //let _ = device.wait_idle(); + let raw = sc.release_resources(); + raw.destroy(); + } + } unsafe fn acquire_texture( &mut self, From 971aee0e1c3b32642d65c4858f2163f09628c5ee Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 5 Jul 2021 18:07:47 -0400 Subject: [PATCH 04/33] hal/dx12: buffer creation, device idling --- wgpu-hal/Cargo.toml | 2 +- wgpu-hal/src/dx12/adapter.rs | 33 ++++++---- wgpu-hal/src/dx12/command.rs | 37 +++++++---- wgpu-hal/src/dx12/conv.rs | 16 ++++- wgpu-hal/src/dx12/device.rs | 117 ++++++++++++++++++++++++++++++++--- wgpu-hal/src/dx12/mod.rs | 84 +++++++++++++++---------- 6 files changed, 225 insertions(+), 64 deletions(-) diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 9b4afceee5..497c962db4 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -16,7 +16,7 @@ default = [] metal = ["naga/msl-out", "block", "foreign-types"] vulkan = ["naga/spv-out", "ash", "gpu-alloc", "gpu-descriptor", "libloading", "inplace_it", "renderdoc-sys"] gles = ["naga/glsl-out", "glow", "egl", "libloading"] -dx12 = ["native", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] +dx12 = ["naga/hlsl-out", "native", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] [dependencies] bitflags = "1.0" diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 52b29d4c21..53874e7908 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -1,8 +1,9 @@ -use super::{conv, HResult as _, HResultPair as _}; +use super::{conv, HResult as _}; use std::{mem, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_5, minwindef, windef, winerror}, um::{d3d12, winuser}, + Interface, }; impl Drop for super::Adapter { @@ -21,7 +22,7 @@ impl super::Adapter { ) -> Option> { // Create the device so that we can get the capabilities. let device = match library.create_device(adapter, native::FeatureLevel::L11_0) { - Ok(pair) => match pair.check() { + Ok(pair) => match pair.to_result() { Ok(device) => device, Err(err) => { log::warn!("Device creation failed: {}", err); @@ -237,16 +238,26 @@ impl crate::Adapter for super::Adapter { native::CommandQueueFlags::empty(), 0, ) - .check() - .map_err(|err| { - log::warn!("Queue creation failed: {}", err); - crate::DeviceError::OutOfMemory - })?; + .to_device_result("Queue creation")?; + + let mut idle_fence = native::Fence::null(); + let hr = self.device.CreateFence( + 0, + d3d12::D3D12_FENCE_FLAG_NONE, + &d3d12::ID3D12Fence::uuidof(), + idle_fence.mut_void(), + ); + hr.to_device_result("Idle fence creation")?; Ok(crate::OpenDevice { device: super::Device { raw: self.device, present_queue: queue, + idler: super::Idler { + fence: idle_fence, + event: native::Event::create(false, false), + }, + private_caps: self.private_caps, }, queue: super::Queue { raw: queue }, }) @@ -333,7 +344,7 @@ impl crate::Adapter for super::Adapter { let mut present_modes = vec![wgt::PresentMode::Fifo]; #[allow(trivial_casts)] - if let Ok(factory5) = surface.factory.cast::().check() { + if let Ok(factory5) = surface.factory.cast::().to_result() { let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; let hr = factory5.CheckFeatureSupport( dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, @@ -342,9 +353,9 @@ impl crate::Adapter for super::Adapter { ); factory5.destroy(); - match hr.to_error() { - Some(err) => log::warn!("Unable to check for tearing support: {}", err), - None => present_modes.push(wgt::PresentMode::Immediate), + match hr.to_result() { + Err(err) => log::warn!("Unable to check for tearing support: {}", err), + Ok(()) => present_modes.push(wgt::PresentMode::Immediate), } } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 3ed82d4385..984ed0f179 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -23,9 +23,16 @@ impl crate::CommandEncoder for super::Encoder { { } - unsafe fn fill_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange, value: u8) {} + unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) { + } - unsafe fn copy_buffer_to_buffer(&mut self, src: &Resource, dst: &Resource, regions: T) {} + unsafe fn copy_buffer_to_buffer( + &mut self, + src: &super::Buffer, + dst: &super::Buffer, + regions: T, + ) { + } unsafe fn copy_texture_to_texture( &mut self, @@ -36,13 +43,19 @@ impl crate::CommandEncoder for super::Encoder { ) { } - unsafe fn copy_buffer_to_texture(&mut self, src: &Resource, dst: &Resource, regions: T) {} + unsafe fn copy_buffer_to_texture( + &mut self, + src: &super::Buffer, + dst: &Resource, + regions: T, + ) { + } unsafe fn copy_texture_to_buffer( &mut self, src: &Resource, src_usage: crate::TextureUses, - dst: &Resource, + dst: &super::Buffer, regions: T, ) { } @@ -55,7 +68,7 @@ impl crate::CommandEncoder for super::Encoder { &mut self, set: &Resource, range: Range, - buffer: &Resource, + buffer: &super::Buffer, offset: wgt::BufferAddress, stride: wgt::BufferSize, ) { @@ -125,32 +138,32 @@ impl crate::CommandEncoder for super::Encoder { } unsafe fn draw_indirect( &mut self, - buffer: &Resource, + buffer: &super::Buffer, offset: wgt::BufferAddress, draw_count: u32, ) { } unsafe fn draw_indexed_indirect( &mut self, - buffer: &Resource, + buffer: &super::Buffer, offset: wgt::BufferAddress, draw_count: u32, ) { } unsafe fn draw_indirect_count( &mut self, - buffer: &Resource, + buffer: &super::Buffer, offset: wgt::BufferAddress, - count_buffer: &Resource, + count_buffer: &super::Buffer, count_offset: wgt::BufferAddress, max_count: u32, ) { } unsafe fn draw_indexed_indirect_count( &mut self, - buffer: &Resource, + buffer: &super::Buffer, offset: wgt::BufferAddress, - count_buffer: &Resource, + count_buffer: &super::Buffer, count_offset: wgt::BufferAddress, max_count: u32, ) { @@ -164,5 +177,5 @@ impl crate::CommandEncoder for super::Encoder { unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} unsafe fn dispatch(&mut self, count: [u32; 3]) {} - unsafe fn dispatch_indirect(&mut self, buffer: &Resource, offset: wgt::BufferAddress) {} + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) {} } diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index f049f8c319..0363c024a8 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -1,4 +1,7 @@ -use winapi::shared::{dxgi1_2, dxgiformat}; +use winapi::{ + shared::{dxgi1_2, dxgiformat}, + um::d3d12, +}; pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { use wgt::TextureFormat as Tf; @@ -114,3 +117,14 @@ pub fn map_acomposite_alpha_mode(mode: crate::CompositeAlphaMode) -> dxgi1_2::DX Cam::PostMultiplied => dxgi1_2::DXGI_ALPHA_MODE_STRAIGHT, } } + +pub fn map_buffer_usage_to_resource_flags(usage: crate::BufferUses) -> d3d12::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + if usage.contains(crate::BufferUses::STORAGE_STORE) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + if !usage.intersects(crate::BufferUses::UNIFORM | crate::BufferUses::STORAGE_LOAD) { + flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + flags +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 73fbb885c5..b232907311 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,25 +1,126 @@ +use super::{conv, HResult as _}; +use std::ptr; +use winapi::{ + shared::{dxgiformat, dxgitype}, + um::{d3d12, d3d12sdklayers, synchapi, winbase}, + Interface, +}; + //TODO: remove this use super::{Encoder, Resource}; + type DeviceResult = Result; +impl super::Device { + pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { + let value = self.idler.fence.get_value() + 1; + log::info!("Waiting for idle with value {}", value); + self.present_queue.signal(self.idler.fence, value); + let hr = self + .idler + .fence + .set_event_on_completion(self.idler.event, value); + hr.to_device_result("Set event")?; + synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE); + Ok(()) + } +} + impl crate::Device for super::Device { - unsafe fn exit(self) {} - unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { - Ok(Resource) + unsafe fn exit(self) { + //self.heap_srv_cbv_uav.0.destroy(); + //self.samplers.destroy(); + //self.rtv_pool.lock().destroy(); + //self.dsv_pool.lock().destroy(); + //self.srv_uav_pool.lock().destroy(); + + //self.descriptor_updater.lock().destroy(); + + // Debug tracking alive objects + if let Ok(debug_device) = self + .raw + .cast::() + .to_result() + { + debug_device.ReportLiveDeviceObjects(d3d12sdklayers::D3D12_RLDO_DETAIL); + debug_device.destroy(); + } + + self.raw.destroy(); + } + + unsafe fn create_buffer( + &self, + desc: &crate::BufferDescriptor, + ) -> Result { + let mut resource = native::Resource::null(); + + let raw_desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: desc.size, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: conv::map_buffer_usage_to_resource_flags(desc.usage), + }; + + let is_cpu_read = desc.usage.contains(crate::BufferUses::MAP_READ); + let is_cpu_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); + + let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { + Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: if is_cpu_write { + d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE + } else if is_cpu_read { + d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK + } else { + d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE + }, + MemoryPoolPreference: match self.private_caps.memory_architecture { + super::MemoryArchitecture::NonUnified if !is_cpu_read && !is_cpu_write => { + d3d12::D3D12_MEMORY_POOL_L1 + } + _ => d3d12::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = self.raw.CreateCommittedResource( + &heap_properties, + d3d12::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + resource.mut_void(), + ); + + hr.to_device_result("Buffer creation")?; + Ok(super::Buffer { resource }) + } + unsafe fn destroy_buffer(&self, buffer: super::Buffer) { + buffer.resource.destroy(); } - unsafe fn destroy_buffer(&self, buffer: Resource) {} unsafe fn map_buffer( &self, - buffer: &Resource, + buffer: &super::Buffer, range: crate::MemoryRange, ) -> DeviceResult { Err(crate::DeviceError::Lost) } - unsafe fn unmap_buffer(&self, buffer: &Resource) -> DeviceResult<()> { + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> DeviceResult<()> { Ok(()) } - unsafe fn flush_mapped_ranges(&self, buffer: &Resource, ranges: I) {} - unsafe fn invalidate_mapped_ranges(&self, buffer: &Resource, ranges: I) {} + unsafe fn flush_mapped_ranges(&self, buffer: &super::Buffer, ranges: I) {} + unsafe fn invalidate_mapped_ranges(&self, buffer: &super::Buffer, ranges: I) {} unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult { Ok(Resource) diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 8aef919369..c97a351f03 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -36,7 +36,7 @@ impl crate::Api for Api { type CommandEncoder = Encoder; type CommandBuffer = Resource; - type Buffer = Resource; + type Buffer = Buffer; type Texture = Resource; type SurfaceTexture = Resource; type TextureView = Resource; @@ -52,36 +52,42 @@ impl crate::Api for Api { type ComputePipeline = Resource; } -trait HResult { - fn to_error(self) -> Option>; +trait HResult { + fn to_result(self) -> Result>; + fn to_device_result(self, description: &str) -> Result; } -impl HResult for i32 { - fn to_error(self) -> Option> { +impl HResult<()> for i32 { + fn to_result(self) -> Result<(), Cow<'static, str>> { if self >= 0 { - return None; + return Ok(()); } let description = match self { winerror::E_UNEXPECTED => "unexpected", winerror::E_NOTIMPL => "not implemented", winerror::E_OUTOFMEMORY => "out of memory", winerror::E_INVALIDARG => "invalid argument", - _ => return Some(Cow::Owned(format!("0x{:X}", self as u32))), + _ => return Err(Cow::Owned(format!("0x{:X}", self as u32))), }; - Some(Cow::Borrowed(description)) + Err(Cow::Borrowed(description)) + } + fn to_device_result(self, description: &str) -> Result<(), crate::DeviceError> { + self.to_result().map_err(|err| { + log::error!("{} failed: {}", description, err); + if self == winerror::E_OUTOFMEMORY { + crate::DeviceError::OutOfMemory + } else { + crate::DeviceError::Lost + } + }) } } -trait HResultPair { - type Object; - fn check(self) -> Result>; -} -impl HResultPair for (T, i32) { - type Object = T; - fn check(self) -> Result> { - match self.1.to_error() { - None => Ok(self.0), - Some(err) => Err(err), - } +impl HResult for (T, i32) { + fn to_result(self) -> Result> { + self.1.to_result().map(|()| self.0) + } + fn to_device_result(self, description: &str) -> Result { + self.1.to_device_result(description).map(|()| self.0) } } @@ -190,9 +196,17 @@ pub struct Adapter { unsafe impl Send for Adapter {} unsafe impl Sync for Adapter {} +/// Helper structure for waiting for GPU. +struct Idler { + fence: native::Fence, + event: native::Event, +} + pub struct Device { raw: native::Device, present_queue: native::CommandQueue, + idler: Idler, + private_caps: PrivateCapabilities, } unsafe impl Send for Device {} @@ -205,6 +219,14 @@ pub struct Queue { unsafe impl Send for Queue {} unsafe impl Sync for Queue {} +#[derive(Debug)] +pub struct Buffer { + resource: native::Resource, +} + +unsafe impl Send for Buffer {} +unsafe impl Sync for Buffer {} + impl crate::Instance for Instance { unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { let lib_main = native::D3D12Lib::new().map_err(|_| crate::InstanceError)?; @@ -215,7 +237,7 @@ impl crate::Instance for Instance { if desc.flags.contains(crate::InstanceFlags::VALIDATION) { // Enable debug layer match lib_main.get_debug_interface() { - Ok(pair) => match pair.check() { + Ok(pair) => match pair.to_result() { Ok(debug_controller) => { debug_controller.enable_layer(); debug_controller.Release(); @@ -233,7 +255,7 @@ impl crate::Instance for Instance { // `CreateDXGIFactory2` if the debug interface is actually available. So // we check for whether it exists first. match lib_dxgi.get_debug_interface1() { - Ok(pair) => match pair.check() { + Ok(pair) => match pair.to_result() { Ok(debug_controller) => { debug_controller.destroy(); factory_flags |= native::FactoryCreationFlags::DEBUG; @@ -250,7 +272,7 @@ impl crate::Instance for Instance { // Create DXGI factory let factory = match lib_dxgi.create_factory2(factory_flags) { - Ok(pair) => match pair.check() { + Ok(pair) => match pair.to_result() { Ok(factory) => factory, Err(err) => { log::warn!("Failed to create DXGI factory: {}", err); @@ -289,7 +311,7 @@ impl crate::Instance for Instance { unsafe fn enumerate_adapters(&self) -> Vec> { // Try to use high performance order by default (returns None on Windows < 1803) - let factory6 = match self.factory.cast::().check() { + let factory6 = match self.factory.cast::().to_result() { Ok(f6) => { // It's okay to decrement the refcount here because we // have another reference to the factory already owned by `self`. @@ -318,7 +340,7 @@ impl crate::Instance for Instance { if hr == winerror::DXGI_ERROR_NOT_FOUND { break; } - if let Some(err) = hr.to_error() { + if let Err(err) = hr.to_result() { log::error!("Failed enumerating adapters: {}", err); break; } @@ -334,12 +356,12 @@ impl crate::Instance for Instance { if hr == winerror::DXGI_ERROR_NOT_FOUND { break; } - if let Some(err) = hr.to_error() { + if let Err(err) = hr.to_result() { log::error!("Failed enumerating adapters: {}", err); break; } - match adapter1.cast::().check() { + match adapter1.cast::().to_result() { Ok(adapter2) => { adapter1.destroy(); adapter2 @@ -398,7 +420,7 @@ impl crate::Surface for Surface { let swap_chain = match self.swap_chain.take() { Some(sc) => { // can't have image resources in flight used by GPU - //device.wait_idle().unwrap(); + let _ = device.wait_idle(); let raw = sc.release_resources(); let result = raw.ResizeBuffers( @@ -408,7 +430,7 @@ impl crate::Surface for Surface { non_srgb_format, flags, ); - if let Some(err) = result.to_error() { + if let Err(err) = result.to_result() { log::error!("ResizeBuffers failed: {}", err); return Err(crate::SurfaceError::Other("window is in use")); } @@ -443,12 +465,12 @@ impl crate::Surface for Surface { swap_chain1.mut_void() as *mut *mut _, ); - if let Some(err) = hr.to_error() { + if let Err(err) = hr.to_result() { log::error!("SwapChain creation error: {}", err); return Err(crate::SurfaceError::Other("swap chain creation")); } - match swap_chain1.cast::().check() { + match swap_chain1.cast::().to_result() { Ok(swap_chain3) => { swap_chain1.destroy(); swap_chain3 @@ -495,7 +517,7 @@ impl crate::Surface for Surface { let _ = sc.wait(winbase::INFINITE); //TODO: this shouldn't be needed, // but it complains that the queue is still used otherwise - //let _ = device.wait_idle(); + let _ = device.wait_idle(); let raw = sc.release_resources(); raw.destroy(); } From c01f6a557e9e0d816068df4fa2712292d8839148 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 5 Jul 2021 18:19:57 -0400 Subject: [PATCH 05/33] hal/dx12: enable the backend in wgpu-core --- wgpu-core/Cargo.toml | 2 +- wgpu-core/build.rs | 2 +- wgpu-core/src/hub.rs | 2 +- wgpu-core/src/instance.rs | 4 ++-- wgpu-core/src/lib.rs | 4 ++-- wgpu-hal/src/dx12/command.rs | 2 +- wgpu-hal/src/dx12/device.rs | 8 ++++---- wgpu-hal/src/dx12/mod.rs | 5 +++-- wgpu-hal/src/lib.rs | 4 +++- 9 files changed, 18 insertions(+), 15 deletions(-) diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index 59519a2194..af10fbb528 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -57,7 +57,7 @@ hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["metal"] } hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["vulkan", "gles"] } [target.'cfg(all(not(target_arch = "wasm32"), windows))'.dependencies] -hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["vulkan"] } +hal = { path = "../wgpu-hal", package = "wgpu-hal", features = ["vulkan", "dx12"] } [build-dependencies] cfg_aliases = "0.1" diff --git a/wgpu-core/build.rs b/wgpu-core/build.rs index f9cba2a418..38dad2eac7 100644 --- a/wgpu-core/build.rs +++ b/wgpu-core/build.rs @@ -9,7 +9,7 @@ fn main() { // Backends vulkan: { all(not(wasm), any(windows, unix_wo_apple)) }, metal: { all(not(wasm), apple) }, - dx12: { all(false, not(wasm), windows) }, + dx12: { all(not(wasm), windows) }, dx11: { all(false, not(wasm), windows) }, gl: { all(not(wasm), unix_wo_apple) }, } diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index 1e676e3c9d..cb367e0332 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -957,7 +957,6 @@ impl HalApi for hal::api::Metal { } } -/* #[cfg(dx12)] impl HalApi for hal::api::Dx12 { const VARIANT: Backend = Backend::Dx12; @@ -969,6 +968,7 @@ impl HalApi for hal::api::Dx12 { } } +/* #[cfg(dx11)] impl HalApi for hal::api::Dx11 { const VARIANT: Backend = Backend::Dx11; diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index e237ccd719..7b004ebb41 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -110,9 +110,9 @@ impl Instance { #[cfg(metal)] metal: init::(backends), #[cfg(dx12)] - dx12: init(Backend::Dx12, backends), + dx12: init::(backends), #[cfg(dx11)] - dx11: init(Backend::Dx11, backends), + dx11: init::(backends), #[cfg(gl)] gl: init::(backends), } diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index 90e91f4462..968eff3e66 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -201,8 +201,8 @@ macro_rules! gfx_select { wgt::Backend::Vulkan => $global.$method::<$crate::api::Vulkan>( $($param),* ), #[cfg(all(not(target_arch = "wasm32"), any(target_os = "ios", target_os = "macos")))] wgt::Backend::Metal => $global.$method::<$crate::api::Metal>( $($param),* ), - //#[cfg(all(not(target_arch = "wasm32"), windows))] - //wgt::Backend::Dx12 => $global.$method::<$crate::api::Dx12>( $($param),* ), + #[cfg(all(not(target_arch = "wasm32"), windows))] + wgt::Backend::Dx12 => $global.$method::<$crate::api::Dx12>( $($param),* ), //#[cfg(all(not(target_arch = "wasm32"), windows))] //wgt::Backend::Dx11 => $global.$method::<$crate::api::Dx11>( $($param),* ), #[cfg(all(not(target_arch = "wasm32"), unix, not(any(target_os = "ios", target_os = "macos"))))] diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 984ed0f179..8285caf64c 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1,7 +1,7 @@ use super::Resource; use std::ops::Range; -impl crate::CommandEncoder for super::Encoder { +impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { Ok(()) } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index b232907311..893d7142b6 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -7,7 +7,7 @@ use winapi::{ }; //TODO: remove this -use super::{Encoder, Resource}; +use super::Resource; type DeviceResult = Result; @@ -142,10 +142,10 @@ impl crate::Device for super::Device { unsafe fn create_command_encoder( &self, desc: &crate::CommandEncoderDescriptor, - ) -> DeviceResult { - Ok(Encoder) + ) -> Result { + Ok(super::CommandEncoder {}) } - unsafe fn destroy_command_encoder(&self, encoder: Encoder) {} + unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) {} unsafe fn create_bind_group_layout( &self, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index c97a351f03..fcd1343dae 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -22,7 +22,6 @@ use winapi::{ #[derive(Clone)] pub struct Api; //TODO: remove these temporaries -pub struct Encoder; #[derive(Debug)] pub struct Resource; @@ -33,7 +32,7 @@ impl crate::Api for Api { type Device = Device; type Queue = Queue; - type CommandEncoder = Encoder; + type CommandEncoder = CommandEncoder; type CommandBuffer = Resource; type Buffer = Buffer; @@ -227,6 +226,8 @@ pub struct Buffer { unsafe impl Send for Buffer {} unsafe impl Sync for Buffer {} +pub struct CommandEncoder {} + impl crate::Instance for Instance { unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { let lib_main = native::D3D12Lib::new().map_err(|_| crate::InstanceError)?; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 58892a4f01..78dc99a747 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -42,7 +42,9 @@ )] #[cfg(all(feature = "metal", not(any(target_os = "macos", target_os = "ios"))))] -compile_error!("Metal backend enabled on non-Apple OS. If your project is not using resolver=\"2\" in Cargo.toml, it should."); +compile_error!("Metal API enabled on non-Apple OS. If your project is not using resolver=\"2\" in Cargo.toml, it should."); +#[cfg(all(feature = "dx12", not(windows)))] +compile_error!("DX12 API enabled on non-Windows OS. If your project is not using resolver=\"2\" in Cargo.toml, it should."); #[cfg(all(feature = "dx12", windows))] mod dx12; From 23615aa55d7645f52d243d83b0ac53784e3825cb Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 6 Jul 2021 00:45:47 -0400 Subject: [PATCH 06/33] hal/dx12: texture and view creation --- Cargo.lock | 1 + wgpu-hal/Cargo.toml | 4 +- wgpu-hal/src/dx12/adapter.rs | 22 +-- wgpu-hal/src/dx12/command.rs | 8 +- wgpu-hal/src/dx12/conv.rs | 31 ++++ wgpu-hal/src/dx12/descriptor.rs | 170 ++++++++++++++++++ wgpu-hal/src/dx12/device.rs | 301 +++++++++++++++++++++++++++++--- wgpu-hal/src/dx12/mod.rs | 79 ++++----- 8 files changed, 525 insertions(+), 91 deletions(-) create mode 100644 wgpu-hal/src/dx12/descriptor.rs diff --git a/Cargo.lock b/Cargo.lock index f1b6a07a92..c62c2e5bf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1992,6 +1992,7 @@ version = "0.9.0" dependencies = [ "arrayvec", "ash", + "bit-set", "bitflags", "block", "core-graphics-types", diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 497c962db4..b19101ea06 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -16,7 +16,7 @@ default = [] metal = ["naga/msl-out", "block", "foreign-types"] vulkan = ["naga/spv-out", "ash", "gpu-alloc", "gpu-descriptor", "libloading", "inplace_it", "renderdoc-sys"] gles = ["naga/glsl-out", "glow", "egl", "libloading"] -dx12 = ["naga/hlsl-out", "native", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] +dx12 = ["naga/hlsl-out", "native", "bit-set", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] [dependencies] bitflags = "1.0" @@ -41,8 +41,8 @@ renderdoc-sys = { version = "0.7.1", optional = true } # backend: Gles glow = { git = "https://github.com/grovesNL/glow", rev = "0864897a28bbdd43f89f4fd8fdd4ed781b719f8a", optional = true } # backend: Dx12 +bit-set = { version = "0.5", optional = true } native = { package = "d3d12", version = "0.4", features = ["libloading"], optional = true } -#winapi = { version = "0.3", features = ["basetsd","d3dcommon","d3dcompiler","dxgi1_2","dxgi1_3","dxgi1_4","dxgi1_5","dxgi1_6","dxgidebug","dxgiformat","dxgitype","handleapi","minwindef","synchapi","unknwnbase","winbase","winerror","winnt"] } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] egl = { package = "khronos-egl", version = "4.1", features = ["dynamic"], optional = true } diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 53874e7908..e733de4c92 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -3,7 +3,6 @@ use std::{mem, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_5, minwindef, windef, winerror}, um::{d3d12, winuser}, - Interface, }; impl Drop for super::Adapter { @@ -136,6 +135,7 @@ impl super::Adapter { let mut features = wgt::Features::empty() | wgt::Features::DEPTH_CLAMPING + | wgt::Features::MAPPABLE_PRIMARY_BUFFERS //TODO: Naga part //| wgt::Features::TEXTURE_BINDING_ARRAY //| wgt::Features::BUFFER_BINDING_ARRAY @@ -240,25 +240,9 @@ impl crate::Adapter for super::Adapter { ) .to_device_result("Queue creation")?; - let mut idle_fence = native::Fence::null(); - let hr = self.device.CreateFence( - 0, - d3d12::D3D12_FENCE_FLAG_NONE, - &d3d12::ID3D12Fence::uuidof(), - idle_fence.mut_void(), - ); - hr.to_device_result("Idle fence creation")?; - + let device = super::Device::new(self.device, queue, self.private_caps)?; Ok(crate::OpenDevice { - device: super::Device { - raw: self.device, - present_queue: queue, - idler: super::Idler { - fence: idle_fence, - event: native::Event::create(false, false), - }, - private_caps: self.private_caps, - }, + device, queue: super::Queue { raw: queue }, }) } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 8285caf64c..76b2145c36 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -36,9 +36,9 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_texture( &mut self, - src: &Resource, + src: &super::Texture, src_usage: crate::TextureUses, - dst: &Resource, + dst: &super::Texture, regions: T, ) { } @@ -46,14 +46,14 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_buffer_to_texture( &mut self, src: &super::Buffer, - dst: &Resource, + dst: &super::Texture, regions: T, ) { } unsafe fn copy_texture_to_buffer( &mut self, - src: &Resource, + src: &super::Texture, src_usage: crate::TextureUses, dst: &super::Buffer, regions: T, diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 0363c024a8..42fb7c62b9 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -128,3 +128,34 @@ pub fn map_buffer_usage_to_resource_flags(usage: crate::BufferUses) -> d3d12::D3 } flags } + +pub fn map_texture_dimension(dim: wgt::TextureDimension) -> d3d12::D3D12_RESOURCE_DIMENSION { + match dim { + wgt::TextureDimension::D1 => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE1D, + wgt::TextureDimension::D2 => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE2D, + wgt::TextureDimension::D3 => d3d12::D3D12_RESOURCE_DIMENSION_TEXTURE3D, + } +} + +pub fn map_texture_usage_to_resource_flags( + usage: crate::TextureUses, +) -> d3d12::D3D12_RESOURCE_FLAGS { + let mut flags = 0; + + if usage.contains(crate::TextureUses::COLOR_TARGET) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + if usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + } + if usage.contains(crate::TextureUses::STORAGE_STORE) { + flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + if !usage.intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) { + flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } + + flags +} diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs new file mode 100644 index 0000000000..da65b8df54 --- /dev/null +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -0,0 +1,170 @@ +use bit_set::BitSet; +use std::fmt; + +const HEAP_SIZE_FIXED: usize = 64; + +#[derive(Copy, Clone)] +pub(super) struct DualHandle { + cpu: native::CpuDescriptor, + gpu: native::GpuDescriptor, + /// How large the block allocated to this handle is. + size: u64, +} + +type DescriptorIndex = u64; + +struct LinearHeap { + raw: native::DescriptorHeap, + handle_size: u64, + total_handles: u64, + start: DualHandle, +} + +impl LinearHeap { + pub(super) fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle { + assert!(index < self.total_handles); + DualHandle { + cpu: self.cpu_descriptor_at(index), + gpu: self.gpu_descriptor_at(index), + size, + } + } + + pub(super) fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { + native::CpuDescriptor { + ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, + } + } + + pub(super) fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { + native::GpuDescriptor { + ptr: self.start.gpu.ptr + self.handle_size * index, + } + } +} + +/// Fixed-size free-list allocator for CPU descriptors. +struct FixedSizeHeap { + raw: native::DescriptorHeap, + /// Bit flag representation of available handles in the heap. + /// + /// 0 - Occupied + /// 1 - free + availability: u64, + handle_size: usize, + start: native::CpuDescriptor, +} + +impl FixedSizeHeap { + fn new(device: native::Device, ty: native::DescriptorHeapType) -> Self { + let (heap, _hr) = device.create_descriptor_heap( + HEAP_SIZE_FIXED as _, + ty, + native::DescriptorHeapFlags::empty(), + 0, + ); + + Self { + handle_size: device.get_descriptor_increment_size(ty) as _, + availability: !0, // all free! + start: heap.start_cpu_descriptor(), + raw: heap, + } + } + + fn alloc_handle(&mut self) -> native::CpuDescriptor { + // Find first free slot. + let slot = self.availability.trailing_zeros() as usize; + assert!(slot < HEAP_SIZE_FIXED); + // Set the slot as occupied. + self.availability ^= 1 << slot; + + native::CpuDescriptor { + ptr: self.start.ptr + self.handle_size * slot, + } + } + + fn free_handle(&mut self, handle: native::CpuDescriptor) { + let slot = (handle.ptr - self.start.ptr) / self.handle_size; + assert!(slot < HEAP_SIZE_FIXED); + assert_eq!(self.availability & (1 << slot), 0); + self.availability ^= 1 << slot; + } + + fn is_full(&self) -> bool { + self.availability == 0 + } + + unsafe fn destroy(&self) { + self.raw.destroy(); + } +} + +#[derive(Clone, Copy)] +pub(super) struct Handle { + pub raw: native::CpuDescriptor, + heap_index: usize, +} + +impl fmt::Debug for Handle { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("Handle") + .field("ptr", &self.raw.ptr) + .field("heap_index", &self.heap_index) + .finish() + } +} + +pub(super) struct CpuPool { + device: native::Device, + ty: native::DescriptorHeapType, + heaps: Vec, + avaliable_heap_indices: BitSet, +} + +impl CpuPool { + pub(super) fn new(device: native::Device, ty: native::DescriptorHeapType) -> Self { + Self { + device, + ty, + heaps: Vec::new(), + avaliable_heap_indices: BitSet::new(), + } + } + + pub(super) fn alloc_handle(&mut self) -> Handle { + let heap_index = self + .avaliable_heap_indices + .iter() + .next() + .unwrap_or_else(|| { + // Allocate a new heap + let id = self.heaps.len(); + self.heaps.push(FixedSizeHeap::new(self.device, self.ty)); + self.avaliable_heap_indices.insert(id); + id + }); + + let heap = &mut self.heaps[heap_index]; + let handle = Handle { + raw: heap.alloc_handle(), + heap_index, + }; + if heap.is_full() { + self.avaliable_heap_indices.remove(heap_index); + } + + handle + } + + pub(super) fn free_handle(&mut self, handle: Handle) { + self.heaps[handle.heap_index].free_handle(handle.raw); + self.avaliable_heap_indices.insert(handle.heap_index); + } + + pub(super) unsafe fn destroy(&self) { + for heap in &self.heaps { + heap.destroy(); + } + } +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 893d7142b6..badf407402 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,5 +1,6 @@ -use super::{conv, HResult as _}; -use std::ptr; +use super::{conv, descriptor, HResult as _}; +use parking_lot::Mutex; +use std::{iter, mem, ptr}; use winapi::{ shared::{dxgiformat, dxgitype}, um::{d3d12, d3d12sdklayers, synchapi, winbase}, @@ -11,7 +12,52 @@ use super::Resource; type DeviceResult = Result; +const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; + +fn wide_cstr(name: &str) -> Vec { + name.encode_utf16().chain(iter::once(0)).collect() +} + impl super::Device { + pub(super) fn new( + raw: native::Device, + present_queue: native::CommandQueue, + private_caps: super::PrivateCapabilities, + ) -> Result { + let mut idle_fence = native::Fence::null(); + let hr = unsafe { + raw.CreateFence( + 0, + d3d12::D3D12_FENCE_FLAG_NONE, + &d3d12::ID3D12Fence::uuidof(), + idle_fence.mut_void(), + ) + }; + hr.to_device_result("Idle fence creation")?; + + Ok(super::Device { + raw, + present_queue, + idler: super::Idler { + fence: idle_fence, + event: native::Event::create(false, false), + }, + private_caps, + rtv_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::Rtv, + )), + dsv_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::Dsv, + )), + srv_uav_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::CbvSrvUav, + )), + }) + } + pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { let value = self.idler.fence.get_value() + 1; log::info!("Waiting for idle with value {}", value); @@ -24,15 +70,125 @@ impl super::Device { synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE); Ok(()) } + + unsafe fn view_texture_as_shader_resource( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: conv::map_texture_format(desc.format), + ViewDimension: 0, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + u: mem::zeroed(), + }; + + #[allow(non_snake_case)] + let MipLevels = match desc.range.mip_level_count { + Some(count) => count.get(), + None => !0, + }; + #[allow(non_snake_case)] + let ArraySize = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + ResourceMinLODClamp: 0.0, + } + }*/ + wgt::TextureViewDimension::D2 if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DMS; + *raw_desc.u.Texture2DMS_mut() = d3d12::D3D12_TEX2DMS_SRV { + UnusedField_NothingToDefine: 0, + } + } + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::D2Array if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; + *raw_desc.u.Texture2DMSArray_mut() = d3d12::D3D12_TEX2DMS_ARRAY_SRV { + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + PlaneSlice: 0, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::D3 => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE3D; + *raw_desc.u.Texture3D_mut() = d3d12::D3D12_TEX3D_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::Cube => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURECUBE; + *raw_desc.u.TextureCube_mut() = d3d12::D3D12_TEXCUBE_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + ResourceMinLODClamp: 0.0, + } + } + wgt::TextureViewDimension::CubeArray => { + raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; + *raw_desc.u.TextureCubeArray_mut() = d3d12::D3D12_TEXCUBE_ARRAY_SRV { + MostDetailedMip: desc.range.base_mip_level, + MipLevels, + First2DArrayFace: desc.range.base_array_layer, + NumCubes: ArraySize / 6, + ResourceMinLODClamp: 0.0, + } + } + } + + let handle = self.srv_uav_pool.lock().alloc_handle(); + self.raw + .CreateShaderResourceView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); + handle + } } impl crate::Device for super::Device { unsafe fn exit(self) { //self.heap_srv_cbv_uav.0.destroy(); //self.samplers.destroy(); - //self.rtv_pool.lock().destroy(); - //self.dsv_pool.lock().destroy(); - //self.srv_uav_pool.lock().destroy(); + self.rtv_pool.into_inner().destroy(); + self.dsv_pool.into_inner().destroy(); + self.srv_uav_pool.into_inner().destroy(); //self.descriptor_updater.lock().destroy(); @@ -76,10 +232,10 @@ impl crate::Device for super::Device { let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, - CPUPageProperty: if is_cpu_write { - d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE - } else if is_cpu_read { + CPUPageProperty: if is_cpu_read { d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_BACK + } else if is_cpu_write { + d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE } else { d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE }, @@ -113,27 +269,132 @@ impl crate::Device for super::Device { &self, buffer: &super::Buffer, range: crate::MemoryRange, - ) -> DeviceResult { - Err(crate::DeviceError::Lost) + ) -> Result { + let mut ptr = ptr::null_mut(); + let hr = (*buffer.resource).Map(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }, &mut ptr); + hr.to_device_result("Map buffer")?; + Ok(crate::BufferMapping { + ptr: ptr::NonNull::new(ptr.offset(range.start as isize) as *mut _).unwrap(), + //TODO: double-check this. Documentation is a bit misleading - + // it implies that Map/Unmap is needed to invalidate/flush memory. + is_coherent: true, + }) } - unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> DeviceResult<()> { + unsafe fn unmap_buffer(&self, buffer: &super::Buffer) -> Result<(), crate::DeviceError> { + (*buffer.resource).Unmap(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }); Ok(()) } - unsafe fn flush_mapped_ranges(&self, buffer: &super::Buffer, ranges: I) {} - unsafe fn invalidate_mapped_ranges(&self, buffer: &super::Buffer, ranges: I) {} + unsafe fn flush_mapped_ranges(&self, _buffer: &super::Buffer, ranges: I) {} + unsafe fn invalidate_mapped_ranges(&self, _buffer: &super::Buffer, ranges: I) {} - unsafe fn create_texture(&self, desc: &crate::TextureDescriptor) -> DeviceResult { - Ok(Resource) + unsafe fn create_texture( + &self, + desc: &crate::TextureDescriptor, + ) -> Result { + let mut resource = native::Resource::null(); + + let raw_desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: conv::map_texture_dimension(desc.dimension), + Alignment: 0, + Width: desc.size.width as u64, + Height: desc.size.height, + DepthOrArraySize: desc.size.depth_or_array_layers as u16, + MipLevels: desc.mip_level_count as u16, + //TODO: map to surface format to allow view casting + Format: conv::map_texture_format(desc.format), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.sample_count, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE, + Flags: conv::map_texture_usage_to_resource_flags(desc.usage), + }; + + let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { + Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match self.private_caps.memory_architecture { + super::MemoryArchitecture::NonUnified => d3d12::D3D12_MEMORY_POOL_L1, + super::MemoryArchitecture::Unified { .. } => d3d12::D3D12_MEMORY_POOL_L0, + }, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = self.raw.CreateCommittedResource( + &heap_properties, + d3d12::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + resource.mut_void(), + ); + + if let Some(label) = desc.label { + let cwstr = wide_cstr(label); + resource.SetName(cwstr.as_ptr()); + } + + hr.to_device_result("Texture creation")?; + Ok(super::Texture { + resource, + size: desc.size, + sample_count: desc.sample_count, + }) + } + unsafe fn destroy_texture(&self, texture: super::Texture) { + texture.resource.destroy(); } - unsafe fn destroy_texture(&self, texture: Resource) {} + unsafe fn create_texture_view( &self, - texture: &Resource, + texture: &super::Texture, desc: &crate::TextureViewDescriptor, - ) -> DeviceResult { - Ok(Resource) + ) -> Result { + Ok(super::TextureView { + handle_srv: if desc + .usage + .intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) + { + Some(self.view_texture_as_shader_resource(texture, desc)) + } else { + None + }, + handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { + unimplemented!() + } else { + None + }, + handle_dsv: if desc.usage.intersects( + crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, + ) { + unimplemented!() + } else { + None + }, + handle_uav: if desc.usage.intersects(crate::TextureUses::STORAGE_STORE) { + unimplemented!() + } else { + None + }, + }) } - unsafe fn destroy_texture_view(&self, view: Resource) {} + unsafe fn destroy_texture_view(&self, view: super::TextureView) { + if let Some(handle) = view.handle_srv { + self.srv_uav_pool.lock().free_handle(handle); + } + if let Some(handle) = view.handle_uav { + self.srv_uav_pool.lock().free_handle(handle); + } + if let Some(handle) = view.handle_rtv { + self.rtv_pool.lock().free_handle(handle); + } + if let Some(handle) = view.handle_dsv { + self.dsv_pool.lock().free_handle(handle); + } + } + unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult { Ok(Resource) } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index fcd1343dae..de1d751a76 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -10,8 +10,10 @@ mod adapter; mod command; mod conv; +mod descriptor; mod device; +use parking_lot::Mutex; use std::{borrow::Cow, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, dxgitype, windef, winerror}, @@ -36,9 +38,9 @@ impl crate::Api for Api { type CommandBuffer = Resource; type Buffer = Buffer; - type Texture = Resource; - type SurfaceTexture = Resource; - type TextureView = Resource; + type Texture = Texture; + type SurfaceTexture = Texture; + type TextureView = TextureView; type Sampler = Resource; type QuerySet = Resource; type Fence = Resource; @@ -107,46 +109,6 @@ impl Drop for Instance { unsafe impl Send for Instance {} unsafe impl Sync for Instance {} -#[derive(Copy, Clone)] -struct DualHandle { - cpu: native::CpuDescriptor, - gpu: native::GpuDescriptor, - /// How large the block allocated to this handle is. - size: u64, -} - -type DescriptorIndex = u64; - -struct DescriptorHeap { - raw: native::DescriptorHeap, - handle_size: u64, - total_handles: u64, - start: DualHandle, -} - -impl DescriptorHeap { - fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle { - assert!(index < self.total_handles); - DualHandle { - cpu: self.cpu_descriptor_at(index), - gpu: self.gpu_descriptor_at(index), - size, - } - } - - fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { - native::CpuDescriptor { - ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, - } - } - - fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { - native::GpuDescriptor { - ptr: self.start.gpu.ptr + self.handle_size * index, - } - } -} - struct SwapChain { raw: native::WeakPtr, // need to associate raw image pointers with the swapchain so they can be properly released @@ -206,6 +168,10 @@ pub struct Device { present_queue: native::CommandQueue, idler: Idler, private_caps: PrivateCapabilities, + // CPU only pools + rtv_pool: Mutex, + dsv_pool: Mutex, + srv_uav_pool: Mutex, } unsafe impl Send for Device {} @@ -218,6 +184,8 @@ pub struct Queue { unsafe impl Send for Queue {} unsafe impl Sync for Queue {} +pub struct CommandEncoder {} + #[derive(Debug)] pub struct Buffer { resource: native::Resource, @@ -226,7 +194,26 @@ pub struct Buffer { unsafe impl Send for Buffer {} unsafe impl Sync for Buffer {} -pub struct CommandEncoder {} +#[derive(Debug)] +pub struct Texture { + resource: native::Resource, + size: wgt::Extent3d, + sample_count: u32, +} + +unsafe impl Send for Texture {} +unsafe impl Sync for Texture {} + +#[derive(Debug)] +pub struct TextureView { + handle_srv: Option, + handle_rtv: Option, + handle_dsv: Option, + handle_uav: Option, +} + +unsafe impl Send for TextureView {} +unsafe impl Sync for TextureView {} impl crate::Instance for Instance { unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { @@ -530,7 +517,7 @@ impl crate::Surface for Surface { ) -> Result>, crate::SurfaceError> { Ok(None) } - unsafe fn discard_texture(&mut self, texture: Resource) {} + unsafe fn discard_texture(&mut self, texture: Texture) {} } impl crate::Queue for Queue { @@ -544,7 +531,7 @@ impl crate::Queue for Queue { unsafe fn present( &mut self, surface: &mut Surface, - texture: Resource, + texture: Texture, ) -> Result<(), crate::SurfaceError> { Ok(()) } From 925ece8b40ece67ea7fb909d498a2a030e3780cd Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 6 Jul 2021 17:37:25 -0400 Subject: [PATCH 07/33] hal/dx12: creation of RTV/DSV/UAV --- wgpu-hal/src/dx12/device.rs | 292 +++++++++++++++++++++++++++++++++--- wgpu-hal/src/dx12/mod.rs | 5 +- 2 files changed, 276 insertions(+), 21 deletions(-) diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index badf407402..3b9e497ede 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -88,8 +88,7 @@ impl super::Device { Some(count) => count.get(), None => !0, }; - #[allow(non_snake_case)] - let ArraySize = match desc.range.array_layer_count { + let array_size = match desc.range.array_layer_count { Some(count) => count.get(), None => texture.size.depth_or_array_layers - desc.range.base_array_layer, }; @@ -133,7 +132,7 @@ impl super::Device { raw_desc.ViewDimension = d3d12::D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; *raw_desc.u.Texture2DMSArray_mut() = d3d12::D3D12_TEX2DMS_ARRAY_SRV { FirstArraySlice: desc.range.base_array_layer, - ArraySize, + ArraySize: array_size, } } wgt::TextureViewDimension::D2Array => { @@ -142,7 +141,7 @@ impl super::Device { MostDetailedMip: desc.range.base_mip_level, MipLevels, FirstArraySlice: desc.range.base_array_layer, - ArraySize, + ArraySize: array_size, PlaneSlice: 0, ResourceMinLODClamp: 0.0, } @@ -169,7 +168,7 @@ impl super::Device { MostDetailedMip: desc.range.base_mip_level, MipLevels, First2DArrayFace: desc.range.base_array_layer, - NumCubes: ArraySize / 6, + NumCubes: array_size / 6, ResourceMinLODClamp: 0.0, } } @@ -180,6 +179,243 @@ impl super::Device { .CreateShaderResourceView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); handle } + + unsafe fn view_texture_as_unoredered_access( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: conv::map_texture_format(desc.format), + ViewDimension: 0, + u: mem::zeroed(), + }; + + let array_size = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_UAV { + MipSlice: desc.range.base_mip_level, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_UAV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_UAV { + MipSlice: desc.range.base_mip_level, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_UAV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D3 => { + raw_desc.ViewDimension = d3d12::D3D12_UAV_DIMENSION_TEXTURE3D; + *raw_desc.u.Texture3D_mut() = d3d12::D3D12_TEX3D_UAV { + MipSlice: desc.range.base_mip_level, + FirstWSlice: desc.range.base_array_layer, + WSize: array_size, + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube UAV") + } + } + + let handle = self.srv_uav_pool.lock().alloc_handle(); + self.raw.CreateUnorderedAccessView( + texture.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle.raw, + ); + handle + } + + unsafe fn view_texture_as_render_target( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_RENDER_TARGET_VIEW_DESC { + Format: conv::map_texture_format(desc.format), + ViewDimension: 0, + u: mem::zeroed(), + }; + + let array_size = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_RTV { + MipSlice: desc.range.base_mip_level, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_RTV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DMS; + *raw_desc.u.Texture2DMS_mut() = d3d12::D3D12_TEX2DMS_RTV { + UnusedField_NothingToDefine: 0, + } + } + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_RTV { + MipSlice: desc.range.base_mip_level, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D2Array if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + *raw_desc.u.Texture2DMSArray_mut() = d3d12::D3D12_TEX2DMS_ARRAY_RTV { + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_RTV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + PlaneSlice: 0, + } + } + wgt::TextureViewDimension::D3 => { + raw_desc.ViewDimension = d3d12::D3D12_RTV_DIMENSION_TEXTURE3D; + *raw_desc.u.Texture3D_mut() = d3d12::D3D12_TEX3D_RTV { + MipSlice: desc.range.base_mip_level, + FirstWSlice: desc.range.base_array_layer, + WSize: array_size, + } + } + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube RTV") + } + } + + let handle = self.rtv_pool.lock().alloc_handle(); + self.raw + .CreateRenderTargetView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); + handle + } + + unsafe fn view_texture_as_depth_stencil( + &self, + texture: &super::Texture, + desc: &crate::TextureViewDescriptor, + read_only: bool, + ) -> descriptor::Handle { + let mut raw_desc = d3d12::D3D12_DEPTH_STENCIL_VIEW_DESC { + Format: conv::map_texture_format(desc.format), + ViewDimension: 0, + Flags: if read_only { + let aspects = crate::FormatAspects::from(desc.format); + let mut flags = 0; + if aspects.contains(crate::FormatAspects::DEPTH) { + flags |= d3d12::D3D12_DSV_FLAG_READ_ONLY_DEPTH; + } + if aspects.contains(crate::FormatAspects::STENCIL) { + flags |= d3d12::D3D12_DSV_FLAG_READ_ONLY_STENCIL; + } + flags + } else { + d3d12::D3D12_DSV_FLAG_NONE + }, + u: mem::zeroed(), + }; + + let array_size = match desc.range.array_layer_count { + Some(count) => count.get(), + None => texture.size.depth_or_array_layers - desc.range.base_array_layer, + }; + + match desc.dimension { + wgt::TextureViewDimension::D1 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE1D; + *raw_desc.u.Texture1D_mut() = d3d12::D3D12_TEX1D_DSV { + MipSlice: desc.range.base_mip_level, + } + } + /* + wgt::TextureViewDimension::D1Array => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE1DARRAY; + *raw_desc.u.Texture1DArray_mut() = d3d12::D3D12_TEX1D_ARRAY_DSV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize, + } + }*/ + wgt::TextureViewDimension::D2 if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DMS; + *raw_desc.u.Texture2DMS_mut() = d3d12::D3D12_TEX2DMS_DSV { + UnusedField_NothingToDefine: 0, + } + } + wgt::TextureViewDimension::D2 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2D; + *raw_desc.u.Texture2D_mut() = d3d12::D3D12_TEX2D_DSV { + MipSlice: desc.range.base_mip_level, + } + } + wgt::TextureViewDimension::D2Array if texture.sample_count > 1 => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + *raw_desc.u.Texture2DMSArray_mut() = d3d12::D3D12_TEX2DMS_ARRAY_DSV { + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + } + } + wgt::TextureViewDimension::D2Array => { + raw_desc.ViewDimension = d3d12::D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + *raw_desc.u.Texture2DArray_mut() = d3d12::D3D12_TEX2D_ARRAY_DSV { + MipSlice: desc.range.base_mip_level, + FirstArraySlice: desc.range.base_array_layer, + ArraySize: array_size, + } + } + wgt::TextureViewDimension::D3 + | wgt::TextureViewDimension::Cube + | wgt::TextureViewDimension::CubeArray => { + panic!("Unable to view texture as cube or 3D RTV") + } + } + + let handle = self.dsv_pool.lock().alloc_handle(); + self.raw + .CreateDepthStencilView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); + handle + } } impl crate::Device for super::Device { @@ -361,37 +597,55 @@ impl crate::Device for super::Device { } else { None }, + handle_uav: if desc.usage.intersects(crate::TextureUses::STORAGE_STORE) { + Some(self.view_texture_as_unoredered_access(texture, desc)) + } else { + None + }, handle_rtv: if desc.usage.intersects(crate::TextureUses::COLOR_TARGET) { - unimplemented!() + Some(self.view_texture_as_render_target(texture, desc)) } else { None }, - handle_dsv: if desc.usage.intersects( - crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, - ) { - unimplemented!() + handle_dsv_ro: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_READ) + { + Some(self.view_texture_as_depth_stencil(texture, desc, true)) } else { None }, - handle_uav: if desc.usage.intersects(crate::TextureUses::STORAGE_STORE) { - unimplemented!() + handle_dsv_rw: if desc + .usage + .intersects(crate::TextureUses::DEPTH_STENCIL_WRITE) + { + Some(self.view_texture_as_depth_stencil(texture, desc, false)) } else { None }, }) } unsafe fn destroy_texture_view(&self, view: super::TextureView) { - if let Some(handle) = view.handle_srv { - self.srv_uav_pool.lock().free_handle(handle); - } - if let Some(handle) = view.handle_uav { - self.srv_uav_pool.lock().free_handle(handle); + if view.handle_srv.is_some() || view.handle_uav.is_some() { + let mut pool = self.srv_uav_pool.lock(); + if let Some(handle) = view.handle_srv { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_uav { + pool.free_handle(handle); + } } if let Some(handle) = view.handle_rtv { self.rtv_pool.lock().free_handle(handle); } - if let Some(handle) = view.handle_dsv { - self.dsv_pool.lock().free_handle(handle); + if view.handle_dsv_ro.is_some() || view.handle_dsv_rw.is_some() { + let mut pool = self.dsv_pool.lock(); + if let Some(handle) = view.handle_dsv_ro { + pool.free_handle(handle); + } + if let Some(handle) = view.handle_dsv_rw { + pool.free_handle(handle); + } } } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index de1d751a76..dc978a6a15 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -207,9 +207,10 @@ unsafe impl Sync for Texture {} #[derive(Debug)] pub struct TextureView { handle_srv: Option, - handle_rtv: Option, - handle_dsv: Option, handle_uav: Option, + handle_rtv: Option, + handle_dsv_ro: Option, + handle_dsv_rw: Option, } unsafe impl Send for TextureView {} From c7519432ee70455f673a1d596937c692d6ea5bc6 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 6 Jul 2021 17:54:04 -0400 Subject: [PATCH 08/33] Filter texture usages on view creation --- wgpu-core/src/device/mod.rs | 22 +++++++++++++++++++++- wgpu-hal/src/vulkan/device.rs | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 59de610407..f418783970 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -728,11 +728,31 @@ impl Device { }); } + // filter the usages based on the other criteria + let usage = { + let mask_copy = !(hal::TextureUses::COPY_SRC | hal::TextureUses::COPY_DST); + let mask_dimension = match view_dim { + wgt::TextureViewDimension::Cube | + wgt::TextureViewDimension::CubeArray => hal::TextureUses::SAMPLED, + wgt::TextureViewDimension::D3 => { + hal::TextureUses::SAMPLED | hal::TextureUses::STORAGE_LOAD | hal::TextureUses::STORAGE_STORE + } + _ => hal::TextureUses::all(), + }; + let mask_mip_level = if end_layer != desc.range.base_array_layer + 1 { + hal::TextureUses::SAMPLED + } else { + hal::TextureUses::all() + }; + texture.hal_usage & mask_copy & mask_dimension & mask_mip_level + }; + + log::debug!("Create view for texture {:?} filters usages to {:?}", texture_id, usage); let hal_desc = hal::TextureViewDescriptor { label: desc.label.borrow_option(), format, dimension: view_dim, - usage: texture.hal_usage, // pass-through + usage, range: desc.range.clone(), }; diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index a1a34a2555..47fc9a45f4 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -725,7 +725,7 @@ impl crate::Device for super::Device { .subresource_range(conv::map_subresource_range(&desc.range, texture.aspects)); let mut image_view_info; - if self.shared.private_caps.image_view_usage { + if self.shared.private_caps.image_view_usage && !desc.usage.is_empty() { image_view_info = vk::ImageViewUsageCreateInfo::builder() .usage(conv::map_texture_usage(desc.usage)) .build(); From 855689462c2c8c686102e51b1e7a96605e4b02f2 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 6 Jul 2021 22:32:57 -0400 Subject: [PATCH 09/33] hal/dx12: fences --- wgpu-hal/src/dx12/command.rs | 10 +++--- wgpu-hal/src/dx12/device.rs | 69 +++++++++++++++++++++++++----------- wgpu-hal/src/dx12/mod.rs | 25 ++++++++++--- 3 files changed, 75 insertions(+), 29 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 76b2145c36..ef4e175cdb 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -60,13 +60,13 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { } - unsafe fn begin_query(&mut self, set: &Resource, index: u32) {} - unsafe fn end_query(&mut self, set: &Resource, index: u32) {} - unsafe fn write_timestamp(&mut self, set: &Resource, index: u32) {} - unsafe fn reset_queries(&mut self, set: &Resource, range: Range) {} + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) {} + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) {} + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) {} + unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range) {} unsafe fn copy_query_results( &mut self, - set: &Resource, + set: &super::QuerySet, range: Range, buffer: &super::Buffer, offset: wgt::BufferAddress, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 3b9e497ede..7c5aeccb39 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -2,7 +2,7 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; use std::{iter, mem, ptr}; use winapi::{ - shared::{dxgiformat, dxgitype}, + shared::{dxgiformat, dxgitype, winerror}, um::{d3d12, d3d12sdklayers, synchapi, winbase}, Interface, }; @@ -10,8 +10,6 @@ use winapi::{ //TODO: remove this use super::Resource; -type DeviceResult = Result; - const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; fn wide_cstr(name: &str) -> Vec { @@ -649,10 +647,13 @@ impl crate::Device for super::Device { } } - unsafe fn create_sampler(&self, desc: &crate::SamplerDescriptor) -> DeviceResult { - Ok(Resource) + unsafe fn create_sampler( + &self, + desc: &crate::SamplerDescriptor, + ) -> Result { + Ok(super::Sampler {}) } - unsafe fn destroy_sampler(&self, sampler: Resource) {} + unsafe fn destroy_sampler(&self, sampler: super::Sampler) {} unsafe fn create_command_encoder( &self, @@ -665,21 +666,21 @@ impl crate::Device for super::Device { unsafe fn create_bind_group_layout( &self, desc: &crate::BindGroupLayoutDescriptor, - ) -> DeviceResult { + ) -> Result { Ok(Resource) } unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} unsafe fn create_pipeline_layout( &self, desc: &crate::PipelineLayoutDescriptor, - ) -> DeviceResult { + ) -> Result { Ok(Resource) } unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} unsafe fn create_bind_group( &self, desc: &crate::BindGroupDescriptor, - ) -> DeviceResult { + ) -> Result { Ok(Resource) } unsafe fn destroy_bind_group(&self, group: Resource) {} @@ -710,24 +711,52 @@ impl crate::Device for super::Device { unsafe fn create_query_set( &self, desc: &wgt::QuerySetDescriptor, - ) -> DeviceResult { - Ok(Resource) + ) -> Result { + Ok(super::QuerySet {}) } - unsafe fn destroy_query_set(&self, set: Resource) {} - unsafe fn create_fence(&self) -> DeviceResult { - Ok(Resource) + unsafe fn destroy_query_set(&self, set: super::QuerySet) {} + + unsafe fn create_fence(&self) -> Result { + let mut raw = native::Fence::null(); + let hr = self.raw.CreateFence( + 0, + d3d12::D3D12_FENCE_FLAG_NONE, + &d3d12::ID3D12Fence::uuidof(), + raw.mut_void(), + ); + hr.to_device_result("Fence creation")?; + Ok(super::Fence { raw }) } - unsafe fn destroy_fence(&self, fence: Resource) {} - unsafe fn get_fence_value(&self, fence: &Resource) -> DeviceResult { - Ok(0) + unsafe fn destroy_fence(&self, fence: super::Fence) { + fence.raw.destroy(); + } + unsafe fn get_fence_value( + &self, + fence: &super::Fence, + ) -> Result { + Ok(fence.raw.GetCompletedValue()) } unsafe fn wait( &self, - fence: &Resource, + fence: &super::Fence, value: crate::FenceValue, timeout_ms: u32, - ) -> DeviceResult { - Ok(true) + ) -> Result { + if fence.raw.GetCompletedValue() >= value { + return Ok(true); + } + let hr = fence.raw.set_event_on_completion(self.idler.event, value); + hr.to_device_result("Set event")?; + + match synchapi::WaitForSingleObject(self.idler.event.0, timeout_ms) { + winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::DeviceError::Lost), + winbase::WAIT_OBJECT_0 => Ok(true), + winerror::WAIT_TIMEOUT => Ok(false), + other => { + log::error!("Unexpected wait status: 0x{:x}", other); + Err(crate::DeviceError::Lost) + } + } } unsafe fn start_capture(&self) -> bool { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index dc978a6a15..8b4fa8157b 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -41,9 +41,9 @@ impl crate::Api for Api { type Texture = Texture; type SurfaceTexture = Texture; type TextureView = TextureView; - type Sampler = Resource; - type QuerySet = Resource; - type Fence = Resource; + type Sampler = Sampler; + type QuerySet = QuerySet; + type Fence = Fence; type BindGroupLayout = Resource; type BindGroup = Resource; @@ -216,6 +216,23 @@ pub struct TextureView { unsafe impl Send for TextureView {} unsafe impl Sync for TextureView {} +#[derive(Debug)] +pub struct Sampler {} + +unsafe impl Send for Sampler {} +unsafe impl Sync for Sampler {} + +#[derive(Debug)] +pub struct QuerySet {} + +#[derive(Debug)] +pub struct Fence { + raw: native::Fence, +} + +unsafe impl Send for Fence {} +unsafe impl Sync for Fence {} + impl crate::Instance for Instance { unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { let lib_main = native::D3D12Lib::new().map_err(|_| crate::InstanceError)?; @@ -525,7 +542,7 @@ impl crate::Queue for Queue { unsafe fn submit( &mut self, command_buffers: &[&Resource], - signal_fence: Option<(&mut Resource, crate::FenceValue)>, + signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { Ok(()) } From 04ca3212de8429d98653ab184cd914dd277d6674 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 7 Jul 2021 00:00:21 -0400 Subject: [PATCH 10/33] hal/dx12: intercept debug output --- wgpu-hal/examples/halmark/main.rs | 14 +- wgpu-hal/src/dx12/instance.rs | 220 ++++++++++++++++++++++++++++++ wgpu-hal/src/dx12/mod.rs | 164 +--------------------- wgpu-hal/src/vulkan/instance.rs | 10 +- 4 files changed, 240 insertions(+), 168 deletions(-) create mode 100644 wgpu-hal/src/dx12/instance.rs diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index e738ea892e..047d0599d3 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -699,7 +699,19 @@ type Api = hal::api::Metal; type Api = hal::api::Vulkan; #[cfg(all(feature = "gles", not(feature = "metal"), not(feature = "vulkan")))] type Api = hal::api::Gles; -#[cfg(not(any(feature = "metal", feature = "vulkan", feature = "gles")))] +#[cfg(all( + feature = "dx12", + not(feature = "metal"), + not(feature = "vulkan"), + not(feature = "gles") +))] +type Api = hal::api::Dx12; +#[cfg(not(any( + feature = "metal", + feature = "vulkan", + feature = "gles", + feature = "dx12" +)))] type Api = hal::api::Empty; fn main() { diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs new file mode 100644 index 0000000000..68dd256d39 --- /dev/null +++ b/wgpu-hal/src/dx12/instance.rs @@ -0,0 +1,220 @@ +use super::HResult as _; +use std::{borrow::Cow, slice, sync::Arc}; +use winapi::{ + shared::{dxgi, dxgi1_2, dxgi1_6, winerror}, + um::{errhandlingapi, winnt}, + vc::excpt, + Interface, +}; + +const MESSAGE_PREFIXES: &[(&str, log::Level)] = &[ + ("CORRUPTION", log::Level::Error), + ("ERROR", log::Level::Error), + ("WARNING", log::Level::Warn), + ("INFO", log::Level::Info), + ("MESSAGE", log::Level::Debug), +]; + +unsafe extern "system" fn output_debug_string_handler( + exception_info: *mut winnt::EXCEPTION_POINTERS, +) -> i32 { + // See https://stackoverflow.com/a/41480827 + let record = &*(*exception_info).ExceptionRecord; + if record.NumberParameters != 2 { + return excpt::EXCEPTION_CONTINUE_SEARCH; + } + let message = match record.ExceptionCode { + winnt::DBG_PRINTEXCEPTION_C => String::from_utf8_lossy(slice::from_raw_parts( + record.ExceptionInformation[1] as *const u8, + record.ExceptionInformation[0], + )), + winnt::DBG_PRINTEXCEPTION_WIDE_C => { + Cow::Owned(String::from_utf16_lossy(slice::from_raw_parts( + record.ExceptionInformation[1] as *const u16, + record.ExceptionInformation[0], + ))) + } + _ => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + let (message, level) = match message.strip_prefix("D3D12 ") { + Some(msg) => { + match MESSAGE_PREFIXES + .iter() + .find(|&(prefix, level)| msg.starts_with(prefix)) + { + Some(&(prefix, level)) => (&msg[prefix.len() + 2..], level), + None => (msg, log::Level::Debug), + } + } + None => return excpt::EXCEPTION_CONTINUE_SEARCH, + }; + + log::log!(level, "D3D12: {}", message,); + excpt::EXCEPTION_CONTINUE_EXECUTION +} + +impl Drop for super::Instance { + fn drop(&mut self) { + unsafe { + self.factory.destroy(); + errhandlingapi::RemoveVectoredExceptionHandler(output_debug_string_handler as *mut _); + } + } +} + +impl crate::Instance for super::Instance { + unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { + let lib_main = native::D3D12Lib::new().map_err(|_| crate::InstanceError)?; + + let lib_dxgi = native::DxgiLib::new().map_err(|_| crate::InstanceError)?; + let mut factory_flags = native::FactoryCreationFlags::empty(); + + if desc.flags.contains(crate::InstanceFlags::VALIDATION) { + // Enable debug layer + match lib_main.get_debug_interface() { + Ok(pair) => match pair.to_result() { + Ok(debug_controller) => { + debug_controller.enable_layer(); + debug_controller.Release(); + } + Err(err) => { + log::warn!("Unable to enable D3D12 debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for D3D12 not found: {:?}", err); + } + } + + // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to + // `CreateDXGIFactory2` if the debug interface is actually available. So + // we check for whether it exists first. + match lib_dxgi.get_debug_interface1() { + Ok(pair) => match pair.to_result() { + Ok(debug_controller) => { + debug_controller.destroy(); + factory_flags |= native::FactoryCreationFlags::DEBUG; + } + Err(err) => { + log::warn!("Unable to enable DXGI debug interface: {}", err); + } + }, + Err(err) => { + log::warn!("Debug interface function for DXGI not found: {:?}", err); + } + } + + // Intercept `OutputDebugString` calls + errhandlingapi::AddVectoredExceptionHandler(0, Some(output_debug_string_handler)); + } + + // Create DXGI factory + let factory = match lib_dxgi.create_factory2(factory_flags) { + Ok(pair) => match pair.to_result() { + Ok(factory) => factory, + Err(err) => { + log::warn!("Failed to create DXGI factory: {}", err); + return Err(crate::InstanceError); + } + }, + Err(err) => { + log::warn!("Factory creation function for DXGI not found: {:?}", err); + return Err(crate::InstanceError); + } + }; + + Ok(Self { + factory, + library: Arc::new(lib_main), + lib_dxgi, + }) + } + + unsafe fn create_surface( + &self, + has_handle: &impl raw_window_handle::HasRawWindowHandle, + ) -> Result { + match has_handle.raw_window_handle() { + raw_window_handle::RawWindowHandle::Windows(handle) => Ok(super::Surface { + factory: self.factory, + wnd_handle: handle.hwnd as *mut _, + swap_chain: None, + }), + _ => Err(crate::InstanceError), + } + } + unsafe fn destroy_surface(&self, _surface: super::Surface) { + // just drop + } + + unsafe fn enumerate_adapters(&self) -> Vec> { + // Try to use high performance order by default (returns None on Windows < 1803) + let factory6 = match self.factory.cast::().to_result() { + Ok(f6) => { + // It's okay to decrement the refcount here because we + // have another reference to the factory already owned by `self`. + f6.destroy(); + Some(f6) + } + Err(err) => { + log::info!("Failed to cast DXGI to 1.6: {}", err); + None + } + }; + + // Enumerate adapters + let mut adapters = Vec::new(); + for cur_index in 0.. { + let raw = match factory6 { + Some(factory) => { + let mut adapter2 = native::WeakPtr::::null(); + let hr = factory.EnumAdapterByGpuPreference( + cur_index, + dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + &dxgi1_2::IDXGIAdapter2::uuidof(), + adapter2.mut_void(), + ); + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.to_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + adapter2 + } + None => { + let mut adapter1 = native::WeakPtr::::null(); + let hr = self + .factory + .EnumAdapters1(cur_index, adapter1.mut_void() as *mut *mut _); + + if hr == winerror::DXGI_ERROR_NOT_FOUND { + break; + } + if let Err(err) = hr.to_result() { + log::error!("Failed enumerating adapters: {}", err); + break; + } + + match adapter1.cast::().to_result() { + Ok(adapter2) => { + adapter1.destroy(); + adapter2 + } + Err(err) => { + log::error!("Failed casting to Adapter2: {}", err); + break; + } + } + } + }; + + adapters.extend(super::Adapter::expose(raw, &self.library)); + } + adapters + } +} diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 8b4fa8157b..6cec7e7fc9 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -12,11 +12,12 @@ mod command; mod conv; mod descriptor; mod device; +mod instance; use parking_lot::Mutex; use std::{borrow::Cow, ptr, sync::Arc}; use winapi::{ - shared::{dxgi, dxgi1_2, dxgi1_4, dxgi1_6, dxgitype, windef, winerror}, + shared::{dxgi, dxgi1_2, dxgi1_4, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, Interface as _, }; @@ -98,14 +99,6 @@ pub struct Instance { lib_dxgi: native::DxgiLib, } -impl Drop for Instance { - fn drop(&mut self) { - unsafe { - self.factory.destroy(); - } - } -} - unsafe impl Send for Instance {} unsafe impl Sync for Instance {} @@ -233,159 +226,6 @@ pub struct Fence { unsafe impl Send for Fence {} unsafe impl Sync for Fence {} -impl crate::Instance for Instance { - unsafe fn init(desc: &crate::InstanceDescriptor) -> Result { - let lib_main = native::D3D12Lib::new().map_err(|_| crate::InstanceError)?; - - let lib_dxgi = native::DxgiLib::new().map_err(|_| crate::InstanceError)?; - let mut factory_flags = native::FactoryCreationFlags::empty(); - - if desc.flags.contains(crate::InstanceFlags::VALIDATION) { - // Enable debug layer - match lib_main.get_debug_interface() { - Ok(pair) => match pair.to_result() { - Ok(debug_controller) => { - debug_controller.enable_layer(); - debug_controller.Release(); - } - Err(err) => { - log::warn!("Unable to enable D3D12 debug interface: {}", err); - } - }, - Err(err) => { - log::warn!("Debug interface function for D3D12 not found: {:?}", err); - } - } - - // The `DXGI_CREATE_FACTORY_DEBUG` flag is only allowed to be passed to - // `CreateDXGIFactory2` if the debug interface is actually available. So - // we check for whether it exists first. - match lib_dxgi.get_debug_interface1() { - Ok(pair) => match pair.to_result() { - Ok(debug_controller) => { - debug_controller.destroy(); - factory_flags |= native::FactoryCreationFlags::DEBUG; - } - Err(err) => { - log::warn!("Unable to enable DXGI debug interface: {}", err); - } - }, - Err(err) => { - log::warn!("Debug interface function for DXGI not found: {:?}", err); - } - } - } - - // Create DXGI factory - let factory = match lib_dxgi.create_factory2(factory_flags) { - Ok(pair) => match pair.to_result() { - Ok(factory) => factory, - Err(err) => { - log::warn!("Failed to create DXGI factory: {}", err); - return Err(crate::InstanceError); - } - }, - Err(err) => { - log::warn!("Factory creation function for DXGI not found: {:?}", err); - return Err(crate::InstanceError); - } - }; - - Ok(Self { - factory, - library: Arc::new(lib_main), - lib_dxgi, - }) - } - - unsafe fn create_surface( - &self, - has_handle: &impl raw_window_handle::HasRawWindowHandle, - ) -> Result { - match has_handle.raw_window_handle() { - raw_window_handle::RawWindowHandle::Windows(handle) => Ok(Surface { - factory: self.factory, - wnd_handle: handle.hwnd as *mut _, - swap_chain: None, - }), - _ => Err(crate::InstanceError), - } - } - unsafe fn destroy_surface(&self, _surface: Surface) { - // just drop - } - - unsafe fn enumerate_adapters(&self) -> Vec> { - // Try to use high performance order by default (returns None on Windows < 1803) - let factory6 = match self.factory.cast::().to_result() { - Ok(f6) => { - // It's okay to decrement the refcount here because we - // have another reference to the factory already owned by `self`. - f6.destroy(); - Some(f6) - } - Err(err) => { - log::info!("Failed to cast DXGI to 1.6: {}", err); - None - } - }; - - // Enumerate adapters - let mut adapters = Vec::new(); - for cur_index in 0.. { - let raw = match factory6 { - Some(factory) => { - let mut adapter2 = native::WeakPtr::::null(); - let hr = factory.EnumAdapterByGpuPreference( - cur_index, - dxgi1_6::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, - &dxgi1_2::IDXGIAdapter2::uuidof(), - adapter2.mut_void(), - ); - - if hr == winerror::DXGI_ERROR_NOT_FOUND { - break; - } - if let Err(err) = hr.to_result() { - log::error!("Failed enumerating adapters: {}", err); - break; - } - - adapter2 - } - None => { - let mut adapter1 = native::WeakPtr::::null(); - let hr = self - .factory - .EnumAdapters1(cur_index, adapter1.mut_void() as *mut *mut _); - - if hr == winerror::DXGI_ERROR_NOT_FOUND { - break; - } - if let Err(err) = hr.to_result() { - log::error!("Failed enumerating adapters: {}", err); - break; - } - - match adapter1.cast::().to_result() { - Ok(adapter2) => { - adapter1.destroy(); - adapter2 - } - Err(err) => { - log::error!("Failed casting to Adapter2: {}", err); - break; - } - } - } - }; - - adapters.extend(Adapter::expose(raw, &self.library)); - } - adapters - } -} - impl SwapChain { unsafe fn release_resources(self) -> native::WeakPtr { for resource in self.resources { diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index d19a946772..ae3c441034 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -23,7 +23,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( return vk::FALSE; } - let message_severity = match message_severity { + let level = match message_severity { vk::DebugUtilsMessageSeverityFlagsEXT::ERROR => log::Level::Error, vk::DebugUtilsMessageSeverityFlagsEXT::WARNING => log::Level::Warn, vk::DebugUtilsMessageSeverityFlagsEXT::INFO => log::Level::Info, @@ -45,7 +45,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( }; log::log!( - message_severity, + level, "{:?} [{} (0x{:x})]\n\t{}", message_type, message_id_name, @@ -64,7 +64,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( .map(|lbl| CStr::from_ptr(lbl).to_string_lossy()) }) .collect::>(); - log::log!(message_severity, "\tqueues: {}", names.join(", ")); + log::log!(level, "\tqueues: {}", names.join(", ")); } if cd.cmd_buf_label_count != 0 { @@ -78,7 +78,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( .map(|lbl| CStr::from_ptr(lbl).to_string_lossy()) }) .collect::>(); - log::log!(message_severity, "\tcommand buffers: {}", names.join(", ")); + log::log!(level, "\tcommand buffers: {}", names.join(", ")); } if cd.object_count != 0 { @@ -99,7 +99,7 @@ unsafe extern "system" fn debug_utils_messenger_callback( ) }) .collect::>(); - log::log!(message_severity, "\tobjects: {}", names.join(", ")); + log::log!(level, "\tobjects: {}", names.join(", ")); } vk::FALSE From 904621ee19ddc3b5f8d36d77a26c28c6a5e8a0fa Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 7 Jul 2021 00:34:11 -0400 Subject: [PATCH 11/33] hal/dx12: sampler and query set creation --- wgpu-hal/src/dx12/conv.rs | 41 ++++++++++++++++++++ wgpu-hal/src/dx12/device.rs | 76 +++++++++++++++++++++++++++++++++---- wgpu-hal/src/dx12/mod.rs | 21 ++++++++-- 3 files changed, 127 insertions(+), 11 deletions(-) diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 42fb7c62b9..a4266f235a 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -159,3 +159,44 @@ pub fn map_texture_usage_to_resource_flags( flags } + +pub fn map_address_mode(mode: wgt::AddressMode) -> d3d12::D3D12_TEXTURE_ADDRESS_MODE { + use wgt::AddressMode as Am; + match mode { + Am::Repeat => d3d12::D3D12_TEXTURE_ADDRESS_MODE_WRAP, + Am::MirrorRepeat => d3d12::D3D12_TEXTURE_ADDRESS_MODE_MIRROR, + Am::ClampToEdge => d3d12::D3D12_TEXTURE_ADDRESS_MODE_CLAMP, + Am::ClampToBorder => d3d12::D3D12_TEXTURE_ADDRESS_MODE_BORDER, + //Am::MirrorClamp => d3d12::D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE, + } +} + +pub fn map_filter_mode(mode: wgt::FilterMode) -> d3d12::D3D12_FILTER_TYPE { + match mode { + wgt::FilterMode::Nearest => d3d12::D3D12_FILTER_TYPE_POINT, + wgt::FilterMode::Linear => d3d12::D3D12_FILTER_TYPE_LINEAR, + } +} + +pub fn map_comparison(func: wgt::CompareFunction) -> d3d12::D3D12_COMPARISON_FUNC { + use wgt::CompareFunction as Cf; + match func { + Cf::Never => d3d12::D3D12_COMPARISON_FUNC_NEVER, + Cf::Less => d3d12::D3D12_COMPARISON_FUNC_LESS, + Cf::LessEqual => d3d12::D3D12_COMPARISON_FUNC_LESS_EQUAL, + Cf::Equal => d3d12::D3D12_COMPARISON_FUNC_EQUAL, + Cf::GreaterEqual => d3d12::D3D12_COMPARISON_FUNC_GREATER_EQUAL, + Cf::Greater => d3d12::D3D12_COMPARISON_FUNC_GREATER, + Cf::NotEqual => d3d12::D3D12_COMPARISON_FUNC_NOT_EQUAL, + Cf::Always => d3d12::D3D12_COMPARISON_FUNC_ALWAYS, + } +} + +pub fn map_border_color(border_color: Option) -> [f32; 4] { + use wgt::SamplerBorderColor as Sbc; + match border_color { + Some(Sbc::TransparentBlack) | None => [0.0; 4], + Some(Sbc::OpaqueBlack) => [0.0, 0.0, 0.0, 1.0], + Some(Sbc::OpaqueWhite) => [1.0; 4], + } +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 7c5aeccb39..9b489c3374 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -53,6 +53,10 @@ impl super::Device { raw, native::DescriptorHeapType::CbvSrvUav, )), + sampler_pool: Mutex::new(descriptor::CpuPool::new( + raw, + native::DescriptorHeapType::Sampler, + )), }) } @@ -418,11 +422,10 @@ impl super::Device { impl crate::Device for super::Device { unsafe fn exit(self) { - //self.heap_srv_cbv_uav.0.destroy(); - //self.samplers.destroy(); self.rtv_pool.into_inner().destroy(); self.dsv_pool.into_inner().destroy(); self.srv_uav_pool.into_inner().destroy(); + self.sampler_pool.into_inner().destroy(); //self.descriptor_updater.lock().destroy(); @@ -651,17 +654,61 @@ impl crate::Device for super::Device { &self, desc: &crate::SamplerDescriptor, ) -> Result { - Ok(super::Sampler {}) + let handle = self.sampler_pool.lock().alloc_handle(); + + let reduction = match desc.compare { + Some(_) => d3d12::D3D12_FILTER_REDUCTION_TYPE_COMPARISON, + None => d3d12::D3D12_FILTER_REDUCTION_TYPE_STANDARD, + }; + let filter = conv::map_filter_mode(desc.min_filter) << d3d12::D3D12_MIN_FILTER_SHIFT + | conv::map_filter_mode(desc.mag_filter) << d3d12::D3D12_MAG_FILTER_SHIFT + | conv::map_filter_mode(desc.mipmap_filter) << d3d12::D3D12_MIP_FILTER_SHIFT + | reduction << d3d12::D3D12_FILTER_REDUCTION_TYPE_SHIFT + | desc + .anisotropy_clamp + .map_or(0, |_| d3d12::D3D12_FILTER_ANISOTROPIC); + + self.raw.create_sampler( + handle.raw, + filter, + [ + conv::map_address_mode(desc.address_modes[0]), + conv::map_address_mode(desc.address_modes[1]), + conv::map_address_mode(desc.address_modes[2]), + ], + 0.0, + desc.anisotropy_clamp.map_or(0, |aniso| aniso.get() as u32), + conv::map_comparison(desc.compare.unwrap_or(wgt::CompareFunction::Always)), + conv::map_border_color(desc.border_color), + desc.lod_clamp.clone().unwrap_or(0.0..16.0), + ); + + Ok(super::Sampler { handle }) + } + unsafe fn destroy_sampler(&self, sampler: super::Sampler) { + self.sampler_pool.lock().free_handle(sampler.handle); } - unsafe fn destroy_sampler(&self, sampler: super::Sampler) {} unsafe fn create_command_encoder( &self, desc: &crate::CommandEncoderDescriptor, ) -> Result { - Ok(super::CommandEncoder {}) + let allocator = self + .raw + .create_command_allocator(native::CmdListType::Direct) + .to_device_result("Command allocator creation")?; + Ok(super::CommandEncoder { + allocator, + list: None, + }) + } + unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { + if let Some(list) = encoder.list { + list.close(); + list.destroy(); + } + encoder.allocator.destroy(); } - unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) {} unsafe fn create_bind_group_layout( &self, @@ -712,9 +759,22 @@ impl crate::Device for super::Device { &self, desc: &wgt::QuerySetDescriptor, ) -> Result { - Ok(super::QuerySet {}) + let heap_ty = match desc.ty { + wgt::QueryType::Occlusion => native::QueryHeapType::Occlusion, + wgt::QueryType::PipelineStatistics(_) => native::QueryHeapType::PipelineStatistics, + wgt::QueryType::Timestamp => native::QueryHeapType::Timestamp, + }; + + let raw = self + .raw + .create_query_heap(heap_ty, desc.count, 0) + .to_device_result("Query heap creation")?; + + Ok(super::QuerySet { raw, ty: desc.ty }) + } + unsafe fn destroy_query_set(&self, set: super::QuerySet) { + set.raw.destroy(); } - unsafe fn destroy_query_set(&self, set: super::QuerySet) {} unsafe fn create_fence(&self) -> Result { let mut raw = native::Fence::null(); diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 6cec7e7fc9..62a620c3ec 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -165,6 +165,7 @@ pub struct Device { rtv_pool: Mutex, dsv_pool: Mutex, srv_uav_pool: Mutex, + sampler_pool: Mutex, } unsafe impl Send for Device {} @@ -177,7 +178,13 @@ pub struct Queue { unsafe impl Send for Queue {} unsafe impl Sync for Queue {} -pub struct CommandEncoder {} +pub struct CommandEncoder { + allocator: native::CommandAllocator, + list: Option, +} + +unsafe impl Send for CommandEncoder {} +unsafe impl Sync for CommandEncoder {} #[derive(Debug)] pub struct Buffer { @@ -210,13 +217,21 @@ unsafe impl Send for TextureView {} unsafe impl Sync for TextureView {} #[derive(Debug)] -pub struct Sampler {} +pub struct Sampler { + handle: descriptor::Handle, +} unsafe impl Send for Sampler {} unsafe impl Sync for Sampler {} #[derive(Debug)] -pub struct QuerySet {} +pub struct QuerySet { + raw: native::QueryHeap, + ty: wgt::QueryType, +} + +unsafe impl Send for QuerySet {} +unsafe impl Sync for QuerySet {} #[derive(Debug)] pub struct Fence { From e128021aee0e2f9f157e34944f23c9f6ad03e83e Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 7 Jul 2021 01:39:32 -0400 Subject: [PATCH 12/33] hal/dx12: pipeline layout --- wgpu-hal/src/dx12/adapter.rs | 2 +- wgpu-hal/src/dx12/command.rs | 6 +- wgpu-hal/src/dx12/conv.rs | 33 +++++ wgpu-hal/src/dx12/device.rs | 235 +++++++++++++++++++++++++++++++++-- wgpu-hal/src/dx12/mod.rs | 44 ++++++- 5 files changed, 303 insertions(+), 17 deletions(-) diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index e733de4c92..00220b1619 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -240,7 +240,7 @@ impl crate::Adapter for super::Adapter { ) .to_device_result("Queue creation")?; - let device = super::Device::new(self.device, queue, self.private_caps)?; + let device = super::Device::new(self.device, queue, self.private_caps, &self.library)?; Ok(crate::OpenDevice { device, queue: super::Queue { raw: queue }, diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index ef4e175cdb..8eef9aff33 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -81,15 +81,15 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn set_bind_group( &mut self, - layout: &Resource, + layout: &super::PipelineLayout, index: u32, - group: &Resource, + group: &super::BindGroup, dynamic_offsets: &[wgt::DynamicOffset], ) { } unsafe fn set_push_constants( &mut self, - layout: &Resource, + layout: &super::PipelineLayout, stages: wgt::ShaderStages, offset: u32, data: &[u32], diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index a4266f235a..c268695863 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -200,3 +200,36 @@ pub fn map_border_color(border_color: Option) -> [f32; Some(Sbc::OpaqueWhite) => [1.0; 4], } } + +pub fn map_visibility(visibility: wgt::ShaderStages) -> native::ShaderVisibility { + match visibility { + wgt::ShaderStages::VERTEX => native::ShaderVisibility::VS, + wgt::ShaderStages::FRAGMENT => native::ShaderVisibility::PS, + _ => native::ShaderVisibility::All, + } +} + +pub fn map_binding_type(ty: &wgt::BindingType) -> native::DescriptorRangeType { + use wgt::BindingType as Bt; + match *ty { + Bt::Sampler { .. } => native::DescriptorRangeType::Sampler, + Bt::Buffer { + ty: wgt::BufferBindingType::Uniform, + .. + } => native::DescriptorRangeType::CBV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: true }, + .. + } + | Bt::Texture { .. } + | Bt::StorageTexture { + access: wgt::StorageTextureAccess::ReadOnly, + .. + } => native::DescriptorRangeType::SRV, + Bt::Buffer { + ty: wgt::BufferBindingType::Storage { read_only: false }, + .. + } + | Bt::StorageTexture { .. } => native::DescriptorRangeType::UAV, + } +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 9b489c3374..aaf62ecfd5 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,6 +1,6 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; -use std::{iter, mem, ptr}; +use std::{iter, mem, ptr, sync::Arc}; use winapi::{ shared::{dxgiformat, dxgitype, winerror}, um::{d3d12, d3d12sdklayers, synchapi, winbase}, @@ -21,6 +21,7 @@ impl super::Device { raw: native::Device, present_queue: native::CommandQueue, private_caps: super::PrivateCapabilities, + library: &Arc, ) -> Result { let mut idle_fence = native::Fence::null(); let hr = unsafe { @@ -57,6 +58,7 @@ impl super::Device { raw, native::DescriptorHeapType::Sampler, )), + library: Arc::clone(library), }) } @@ -713,24 +715,237 @@ impl crate::Device for super::Device { unsafe fn create_bind_group_layout( &self, desc: &crate::BindGroupLayoutDescriptor, - ) -> Result { - Ok(Resource) + ) -> Result { + Ok(super::BindGroupLayout { + entries: desc.entries.to_vec(), + }) + } + unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) { + // just drop } - unsafe fn destroy_bind_group_layout(&self, bg_layout: Resource) {} unsafe fn create_pipeline_layout( &self, desc: &crate::PipelineLayoutDescriptor, - ) -> Result { - Ok(Resource) + ) -> Result { + // Pipeline layouts are implemented as RootSignature for D3D12. + // + // Push Constants are implemented as root constants. + // + // Each descriptor set layout will be one table entry of the root signature. + // We have the additional restriction that SRV/CBV/UAV and samplers need to be + // separated, so each set layout will actually occupy up to 2 entries! + // SRV/CBV/UAV tables are added to the signature first, then Sampler tables, + // and finally dynamic uniform descriptors. + // + // Dynamic uniform buffers are implemented as root descriptors. + // This allows to handle the dynamic offsets properly, which would not be feasible + // with a combination of root constant and descriptor table. + // + // Root signature layout: + // Root Constants: Register: Offest/4, Space: 0 + // ... + // DescriptorTable0: Space: 1 (SrvCbvUav) + // DescriptorTable0: Space: 1 (Sampler) + // Root Descriptors 0 + // DescriptorTable1: Space: 2 (SrvCbvUav) + // Root Descriptors 1 + // ... + + let mut root_offset = 0u32; + let root_constants: &[()] = &[]; + + // Number of elements in the root signature. + let total_parameters = root_constants.len() + desc.bind_group_layouts.len() * 2; + // Guarantees that no re-allocation is done, and our pointers are valid + let mut parameters = Vec::with_capacity(total_parameters); + let mut parameter_offsets = Vec::with_capacity(total_parameters); + + let root_space_offset = if !root_constants.is_empty() { 1 } else { 0 }; + // Collect the whole number of bindings we will create upfront. + // It allows us to preallocate enough storage to avoid reallocation, + // which could cause invalid pointers. + let total_non_dynamic_entries = desc + .bind_group_layouts + .iter() + .flat_map(|bgl| { + bgl.entries.iter().map(|entry| match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => 0, + _ => 1, + }) + }) + .sum(); + let mut ranges = Vec::with_capacity(total_non_dynamic_entries); + + let mut root_elements = + arrayvec::ArrayVec::<[super::RootElement; crate::MAX_BIND_GROUPS]>::default(); + for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { + let space = root_space_offset + index as u32; + let mut types = super::TableTypes::empty(); + let root_table_offset = root_offset as usize; + + let mut visibility_view_static = wgt::ShaderStages::empty(); + let mut visibility_view_dynamic = wgt::ShaderStages::empty(); + let mut visibility_sampler = wgt::ShaderStages::empty(); + for entry in bgl.entries.iter() { + match entry.ty { + wgt::BindingType::Sampler { .. } => visibility_sampler |= entry.visibility, + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => visibility_view_dynamic |= entry.visibility, + _ => visibility_view_static |= entry.visibility, + } + } + + // SRV/CBV/UAV descriptor tables + let mut range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } + | wgt::BindingType::Sampler { .. } => continue, + ref other => conv::map_binding_type(other), + }; + ranges.push(native::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native::Binding { + register: entry.binding, + space, + }, + d3d12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + } + if ranges.len() > range_base { + parameter_offsets.push(root_offset); + parameters.push(native::RootParameter::descriptor_table( + conv::map_visibility(visibility_view_static), + &ranges[range_base..], + )); + types |= super::TableTypes::SRV_CBV_UAV; + root_offset += 1; + } + + // Sampler descriptor tables + range_base = ranges.len(); + for entry in bgl.entries.iter() { + let range_ty = match entry.ty { + wgt::BindingType::Sampler { .. } => native::DescriptorRangeType::Sampler, + _ => continue, + }; + ranges.push(native::DescriptorRange::new( + range_ty, + entry.count.map_or(1, |count| count.get()), + native::Binding { + register: entry.binding, + space, + }, + d3d12::D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND, + )); + } + if ranges.len() > range_base { + parameter_offsets.push(root_offset); + parameters.push(native::RootParameter::descriptor_table( + conv::map_visibility(visibility_sampler), + &ranges[range_base..], + )); + types |= super::TableTypes::SAMPLERS; + root_offset += 1; + } + + // Root (dynamic) descriptor tables + let dynamic_buffers_visibility = conv::map_visibility(visibility_view_dynamic); + for entry in bgl.entries.iter() { + let buffer_ty = match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + ty, + .. + } => ty, + _ => continue, + }; + let binding = native::Binding { + register: entry.binding, + space, + }; + let param = match buffer_ty { + wgt::BufferBindingType::Uniform => { + native::RootParameter::cbv_descriptor(dynamic_buffers_visibility, binding) + } + wgt::BufferBindingType::Storage { read_only: true } => { + native::RootParameter::srv_descriptor(dynamic_buffers_visibility, binding) + } + wgt::BufferBindingType::Storage { read_only: false } => { + native::RootParameter::uav_descriptor(dynamic_buffers_visibility, binding) + } + }; + parameter_offsets.push(root_offset); + parameters.push(param); + root_offset += 2; // root view costs 2 words + } + + root_elements.push(super::RootElement { + types, + offset: root_table_offset, + }); + } + + // Ensure that we didn't reallocate! + debug_assert_eq!(ranges.len(), total_non_dynamic_entries); + assert_eq!(parameters.len(), parameter_offsets.len()); + + let (blob, error) = self + .library + .serialize_root_signature( + native::RootSignatureVersion::V1_0, + ¶meters, + &[], + native::RootSignatureFlags::ALLOW_IA_INPUT_LAYOUT, + ) + .map_err(|e| { + log::error!("Unable to find serialization function: {:?}", e); + crate::DeviceError::Lost + })? + .to_device_result("Root signature serialization")?; + + if !error.is_null() { + log::error!( + "Root signature serialization error: {:?}", + error.as_c_str().to_str().unwrap() + ); + error.destroy(); + return Err(crate::DeviceError::Lost); + } + + let raw = self + .raw + .create_root_signature(blob, 0) + .to_device_result("Root signature creation")?; + blob.destroy(); + + Ok(super::PipelineLayout { + raw, + parameter_offsets, + total_slots: root_offset, + elements: root_elements, + }) + } + unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { + pipeline_layout.raw.destroy(); } - unsafe fn destroy_pipeline_layout(&self, pipeline_layout: Resource) {} + unsafe fn create_bind_group( &self, desc: &crate::BindGroupDescriptor, - ) -> Result { - Ok(Resource) + ) -> Result { + Ok(super::BindGroup {}) } - unsafe fn destroy_bind_group(&self, group: Resource) {} + unsafe fn destroy_bind_group(&self, group: super::BindGroup) {} unsafe fn create_shader_module( &self, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 62a620c3ec..4ace6d4dee 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -46,9 +46,9 @@ impl crate::Api for Api { type QuerySet = QuerySet; type Fence = Fence; - type BindGroupLayout = Resource; - type BindGroup = Resource; - type PipelineLayout = Resource; + type BindGroupLayout = BindGroupLayout; + type BindGroup = BindGroup; + type PipelineLayout = PipelineLayout; type ShaderModule = Resource; type RenderPipeline = Resource; type ComputePipeline = Resource; @@ -166,6 +166,8 @@ pub struct Device { dsv_pool: Mutex, srv_uav_pool: Mutex, sampler_pool: Mutex, + // library + library: Arc, } unsafe impl Send for Device {} @@ -241,6 +243,42 @@ pub struct Fence { unsafe impl Send for Fence {} unsafe impl Sync for Fence {} +pub struct BindGroupLayout { + /// Sorted list of entries. + entries: Vec, +} + +#[derive(Debug)] +pub struct BindGroup {} + +bitflags::bitflags! { + struct TableTypes: u8 { + const SRV_CBV_UAV = 0x1; + const SAMPLERS = 0x2; + } +} + +type RootSignatureOffset = usize; + +pub struct RootElement { + types: TableTypes, + offset: RootSignatureOffset, +} + +pub struct PipelineLayout { + raw: native::RootSignature, + /// A root offset per parameter. + parameter_offsets: Vec, + /// Total number of root slots occupied by the layout. + total_slots: u32, + // Storing for each associated bind group, which tables we created + // in the root signature. This is required for binding descriptor sets. + elements: arrayvec::ArrayVec<[RootElement; crate::MAX_BIND_GROUPS]>, +} + +unsafe impl Send for PipelineLayout {} +unsafe impl Sync for PipelineLayout {} + impl SwapChain { unsafe fn release_resources(self) -> native::WeakPtr { for resource in self.resources { From 43a39a229e0fc04cc4b9f8c45db9c34debce09c2 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 7 Jul 2021 02:03:54 -0400 Subject: [PATCH 13/33] hal/dx12: command pooling --- wgpu-hal/src/dx12/command.rs | 42 +++++++++++++++++++++++++++++++----- wgpu-hal/src/dx12/conv.rs | 5 +++++ wgpu-hal/src/dx12/device.rs | 13 +++++------ wgpu-hal/src/dx12/mod.rs | 13 +++++++++-- 4 files changed, 60 insertions(+), 13 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 8eef9aff33..7bd58f7de9 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1,15 +1,47 @@ -use super::Resource; +use super::{conv, HResult as _, Resource}; use std::ops::Range; impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { + let list = match self.free_lists.pop() { + Some(list) => { + list.reset(self.allocator, native::PipelineState::null()); + list + } + None => self + .device + .create_graphics_command_list( + native::CmdListType::Direct, + self.allocator, + native::PipelineState::null(), + 0, + ) + .to_device_result("Create command list")?, + }; + + if let Some(label) = label { + let cwstr = conv::map_label(label); + list.SetName(cwstr.as_ptr()); + } + self.list = Some(list); Ok(()) } - unsafe fn discard_encoding(&mut self) {} - unsafe fn end_encoding(&mut self) -> Result { - Ok(Resource) + unsafe fn discard_encoding(&mut self) { + if let Some(list) = self.list.take() { + list.close(); + self.free_lists.push(list); + } + } + unsafe fn end_encoding(&mut self) -> Result { + let raw = self.list.take().unwrap(); + raw.close(); + Ok(super::CommandBuffer { raw }) + } + unsafe fn reset_all>(&mut self, command_buffers: I) { + for cmd_buf in command_buffers { + self.free_lists.push(cmd_buf.raw); + } } - unsafe fn reset_all(&mut self, command_buffers: I) {} unsafe fn transition_buffers<'a, T>(&mut self, barriers: T) where diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index c268695863..ab30e9116a 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -1,3 +1,4 @@ +use std::iter; use winapi::{ shared::{dxgi1_2, dxgiformat}, um::d3d12, @@ -233,3 +234,7 @@ pub fn map_binding_type(ty: &wgt::BindingType) -> native::DescriptorRangeType { | Bt::StorageTexture { .. } => native::DescriptorRangeType::UAV, } } + +pub fn map_label(name: &str) -> Vec { + name.encode_utf16().chain(iter::once(0)).collect() +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index aaf62ecfd5..3750d0b3da 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,6 +1,6 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; -use std::{iter, mem, ptr, sync::Arc}; +use std::{mem, ptr, sync::Arc}; use winapi::{ shared::{dxgiformat, dxgitype, winerror}, um::{d3d12, d3d12sdklayers, synchapi, winbase}, @@ -12,10 +12,6 @@ use super::Resource; const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; -fn wide_cstr(name: &str) -> Vec { - name.encode_utf16().chain(iter::once(0)).collect() -} - impl super::Device { pub(super) fn new( raw: native::Device, @@ -571,7 +567,7 @@ impl crate::Device for super::Device { ); if let Some(label) = desc.label { - let cwstr = wide_cstr(label); + let cwstr = conv::map_label(label); resource.SetName(cwstr.as_ptr()); } @@ -701,7 +697,9 @@ impl crate::Device for super::Device { .to_device_result("Command allocator creation")?; Ok(super::CommandEncoder { allocator, + device: self.raw, list: None, + free_lists: Vec::new(), }) } unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { @@ -709,6 +707,9 @@ impl crate::Device for super::Device { list.close(); list.destroy(); } + for list in encoder.free_lists { + list.destroy(); + } encoder.allocator.destroy(); } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 4ace6d4dee..7b83c223c7 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -36,7 +36,7 @@ impl crate::Api for Api { type Queue = Queue; type CommandEncoder = CommandEncoder; - type CommandBuffer = Resource; + type CommandBuffer = CommandBuffer; type Buffer = Buffer; type Texture = Texture; @@ -182,12 +182,21 @@ unsafe impl Sync for Queue {} pub struct CommandEncoder { allocator: native::CommandAllocator, + device: native::Device, list: Option, + free_lists: Vec, } unsafe impl Send for CommandEncoder {} unsafe impl Sync for CommandEncoder {} +pub struct CommandBuffer { + raw: native::GraphicsCommandList, +} + +unsafe impl Send for CommandBuffer {} +unsafe impl Sync for CommandBuffer {} + #[derive(Debug)] pub struct Buffer { resource: native::Resource, @@ -434,7 +443,7 @@ impl crate::Surface for Surface { impl crate::Queue for Queue { unsafe fn submit( &mut self, - command_buffers: &[&Resource], + command_buffers: &[&CommandBuffer], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { Ok(()) From 4f6812fc36e037b44f9ec454a17d4cd5fc56834c Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 7 Jul 2021 19:51:26 -0400 Subject: [PATCH 14/33] hal/dx12: resource barriers --- wgpu-hal/src/dx12/command.rs | 106 +++++++++++++++++++++++++++++++++- wgpu-hal/src/dx12/conv.rs | 63 ++++++++++++++++++++ wgpu-hal/src/dx12/device.rs | 8 +++ wgpu-hal/src/dx12/instance.rs | 7 ++- wgpu-hal/src/dx12/mod.rs | 20 +++++++ wgpu-hal/src/lib.rs | 1 + 6 files changed, 203 insertions(+), 2 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 7bd58f7de9..d299099c24 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1,5 +1,6 @@ use super::{conv, HResult as _, Resource}; -use std::ops::Range; +use std::{mem, ops::Range}; +use winapi::um::d3d12; impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { @@ -24,6 +25,7 @@ impl crate::CommandEncoder for super::CommandEncoder { list.SetName(cwstr.as_ptr()); } self.list = Some(list); + self.temp.clear(); Ok(()) } unsafe fn discard_encoding(&mut self) { @@ -47,12 +49,114 @@ impl crate::CommandEncoder for super::CommandEncoder { where T: Iterator>, { + self.temp.barriers.clear(); + + for barrier in barriers { + let s0 = conv::map_buffer_usage_to_state(barrier.usage.start); + let s1 = conv::map_buffer_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + }; + self.temp.barriers.push(raw); + } else if barrier.usage.start == crate::BufferUses::STORAGE_STORE { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.UAV_mut() = d3d12::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.buffer.resource.as_mut_ptr(), + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + } } unsafe fn transition_textures<'a, T>(&mut self, barriers: T) where T: Iterator>, { + self.temp.barriers.clear(); + + for barrier in barriers { + let s0 = conv::map_texture_usage_to_state(barrier.usage.start); + let s1 = conv::map_texture_usage_to_state(barrier.usage.end); + if s0 != s1 { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + Subresource: d3d12::D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + StateBefore: s0, + StateAfter: s1, + }; + + let mip_level_count = match barrier.range.mip_level_count { + Some(count) => count.get(), + None => barrier.texture.mip_level_count - barrier.range.base_mip_level, + }; + let array_layer_count = match barrier.range.array_layer_count { + Some(count) => count.get(), + None => barrier.texture.array_layer_count - barrier.range.base_array_layer, + }; + + if barrier.range.aspect == wgt::TextureAspect::All + && barrier.range.base_mip_level + mip_level_count + == barrier.texture.mip_level_count + && barrier.range.base_array_layer + array_layer_count + == barrier.texture.array_layer_count + { + // Only one barrier if it affects the whole image. + self.temp.barriers.push(raw); + } else { + // Generate barrier for each layer/level combination. + for rel_mip_level in 0..mip_level_count { + for rel_array_layer in 0..array_layer_count { + raw.u.Transition_mut().Subresource = barrier.texture.calc_subresource( + barrier.range.base_mip_level + rel_mip_level, + barrier.range.base_array_layer + rel_array_layer, + 0, + ); + } + } + self.temp.barriers.push(raw); + } + } else if barrier.usage.start == crate::TextureUses::STORAGE_STORE { + let mut raw = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_UAV, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + *raw.u.UAV_mut() = d3d12::D3D12_RESOURCE_UAV_BARRIER { + pResource: barrier.texture.resource.as_mut_ptr(), + }; + self.temp.barriers.push(raw); + } + } + + if !self.temp.barriers.is_empty() { + self.list + .unwrap() + .ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + } } unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) { diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index ab30e9116a..01aa76b5a2 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -238,3 +238,66 @@ pub fn map_binding_type(ty: &wgt::BindingType) -> native::DescriptorRangeType { pub fn map_label(name: &str) -> Vec { name.encode_utf16().chain(iter::once(0)).collect() } + +pub fn map_buffer_usage_to_state(usage: crate::BufferUses) -> d3d12::D3D12_RESOURCE_STATES { + use crate::BufferUses as Bu; + let mut state = d3d12::D3D12_RESOURCE_STATE_COMMON; + + if usage.intersects(Bu::COPY_SRC) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Bu::COPY_DST) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Bu::INDEX) { + state |= d3d12::D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if usage.intersects(Bu::VERTEX | Bu::UNIFORM) { + state |= d3d12::D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if usage.intersects(Bu::STORAGE_LOAD) { + state |= d3d12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Bu::STORAGE_STORE) { + state |= d3d12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + if usage.intersects(Bu::INDIRECT) { + state |= d3d12::D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + state +} + +pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12::D3D12_RESOURCE_STATES { + use crate::TextureUses as Tu; + let mut state = d3d12::D3D12_RESOURCE_STATE_COMMON; + //Note: `RESOLVE_SOURCE` and `RESOLVE_DEST` are not used here + //Note: `PRESENT` is the same as `COMMON` + if usage == crate::TextureUses::UNINITIALIZED { + return state; + } + + if usage.intersects(Tu::COPY_SRC) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_SOURCE; + } + if usage.intersects(Tu::COPY_DST) { + state |= d3d12::D3D12_RESOURCE_STATE_COPY_DEST; + } + if usage.intersects(Tu::SAMPLED | Tu::STORAGE_LOAD) { + state |= d3d12::D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE + | d3d12::D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + } + if usage.intersects(Tu::COLOR_TARGET) { + state |= d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET; + } + if usage.intersects(Tu::DEPTH_STENCIL_READ) { + state |= d3d12::D3D12_RESOURCE_STATE_DEPTH_READ; + } + if usage.intersects(Tu::DEPTH_STENCIL_WRITE) { + state |= d3d12::D3D12_RESOURCE_STATE_DEPTH_WRITE; + } + if usage.intersects(Tu::STORAGE_STORE) { + state |= d3d12::D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + state +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 3750d0b3da..a3fdefd87a 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -575,6 +575,13 @@ impl crate::Device for super::Device { Ok(super::Texture { resource, size: desc.size, + array_layer_count: match desc.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { + desc.size.depth_or_array_layers + } + wgt::TextureDimension::D3 => 1, + }, + mip_level_count: desc.mip_level_count, sample_count: desc.sample_count, }) } @@ -700,6 +707,7 @@ impl crate::Device for super::Device { device: self.raw, list: None, free_lists: Vec::new(), + temp: super::Temp::default(), }) } unsafe fn destroy_command_encoder(&self, encoder: super::CommandEncoder) { diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs index 68dd256d39..8d5ff2b84e 100644 --- a/wgpu-hal/src/dx12/instance.rs +++ b/wgpu-hal/src/dx12/instance.rs @@ -50,7 +50,12 @@ unsafe extern "system" fn output_debug_string_handler( None => return excpt::EXCEPTION_CONTINUE_SEARCH, }; - log::log!(level, "D3D12: {}", message,); + log::log!(level, "{}", message,); + + if cfg!(debug_assertions) && level == log::Level::Error { + std::process::exit(1); + } + excpt::EXCEPTION_CONTINUE_EXECUTION } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 7b83c223c7..eab2fd98c2 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -180,11 +180,23 @@ pub struct Queue { unsafe impl Send for Queue {} unsafe impl Sync for Queue {} +#[derive(Default)] +struct Temp { + barriers: Vec, +} + +impl Temp { + fn clear(&mut self) { + self.barriers.clear(); + } +} + pub struct CommandEncoder { allocator: native::CommandAllocator, device: native::Device, list: Option, free_lists: Vec, + temp: Temp, } unsafe impl Send for CommandEncoder {} @@ -209,12 +221,20 @@ unsafe impl Sync for Buffer {} pub struct Texture { resource: native::Resource, size: wgt::Extent3d, + array_layer_count: u32, + mip_level_count: u32, sample_count: u32, } unsafe impl Send for Texture {} unsafe impl Sync for Texture {} +impl Texture { + fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { + mip_level + (array_layer + plane * self.array_layer_count) * self.mip_level_count + } +} + #[derive(Debug)] pub struct TextureView { handle_srv: Option, diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 78dc99a747..088acdc67c 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -620,6 +620,7 @@ bitflags::bitflags! { /// If a usage is not ordered, then even if it doesn't change between draw calls, there /// still need to be pipeline barriers inserted for synchronization. const ORDERED = Self::READ_ALL.bits | Self::COPY_DST.bits | Self::COLOR_TARGET.bits | Self::DEPTH_STENCIL_WRITE.bits; + //TODO: remove this const UNINITIALIZED = 0xFFFF; } } From f79c3781c3a001c54478f925e0cc2106e041a451 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 7 Jul 2021 22:36:27 -0400 Subject: [PATCH 15/33] hal/dx12: buffer-buffer and texture-texture copies --- wgpu-hal/src/dx12/command.rs | 115 ++++++++++++++++++++++++++++++++--- wgpu-hal/src/dx12/device.rs | 7 +-- wgpu-hal/src/dx12/mod.rs | 13 +++- wgpu-hal/src/lib.rs | 3 + 4 files changed, 122 insertions(+), 16 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index d299099c24..6068be44e5 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -115,14 +115,14 @@ impl crate::CommandEncoder for super::CommandEncoder { }; let array_layer_count = match barrier.range.array_layer_count { Some(count) => count.get(), - None => barrier.texture.array_layer_count - barrier.range.base_array_layer, + None => barrier.texture.array_layer_count() - barrier.range.base_array_layer, }; if barrier.range.aspect == wgt::TextureAspect::All && barrier.range.base_mip_level + mip_level_count == barrier.texture.mip_level_count && barrier.range.base_array_layer + array_layer_count - == barrier.texture.array_layer_count + == barrier.texture.array_layer_count() { // Only one barrier if it affects the whole image. self.temp.barriers.push(raw); @@ -159,7 +159,13 @@ impl crate::CommandEncoder for super::CommandEncoder { } } - unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) { + unsafe fn fill_buffer( + &mut self, + _buffer: &super::Buffer, + _range: crate::MemoryRange, + _value: u8, + ) { + //TODO } unsafe fn copy_buffer_to_buffer( @@ -167,16 +173,97 @@ impl crate::CommandEncoder for super::CommandEncoder { src: &super::Buffer, dst: &super::Buffer, regions: T, - ) { + ) where + T: Iterator, + { + let list = self.list.unwrap(); + for r in regions { + list.CopyBufferRegion( + dst.resource.as_mut_ptr(), + r.dst_offset, + src.resource.as_mut_ptr(), + r.src_offset, + r.size.get(), + ); + } } unsafe fn copy_texture_to_texture( &mut self, src: &super::Texture, - src_usage: crate::TextureUses, + _src_usage: crate::TextureUses, dst: &super::Texture, regions: T, - ) { + ) where + T: Iterator, + { + let list = self.list.unwrap(); + let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + + for r in regions { + let ( + depth, + array_layer_count, + src_z, + src_base_array_layer, + dst_z, + dst_base_array_layer, + ) = match src.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => ( + 1, + r.size.depth_or_array_layers, + 0, + r.src_base.origin.z, + 0, + r.dst_base.origin.z, + ), + wgt::TextureDimension::D3 => ( + r.size.depth_or_array_layers, + 1, + r.src_base.origin.z, + 0, + r.dst_base.origin.z, + 0, + ), + }; + let src_box = d3d12::D3D12_BOX { + left: r.src_base.origin.x, + top: r.src_base.origin.y, + right: r.src_base.origin.x + r.size.width, + bottom: r.src_base.origin.y + r.size.height, + front: src_z, + back: src_z + depth, + }; + for rel_array_layer in 0..array_layer_count { + *src_location.u.SubresourceIndex_mut() = src.calc_subresource( + r.src_base.mip_level, + src_base_array_layer + rel_array_layer, + 0, + ); + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource( + r.dst_base.mip_level, + dst_base_array_layer + rel_array_layer, + 0, + ); + list.CopyTextureRegion( + &dst_location, + r.dst_base.origin.x, + r.dst_base.origin.y, + dst_z, + &src_location, + &src_box, + ); + } + } } unsafe fn copy_buffer_to_texture( @@ -184,7 +271,9 @@ impl crate::CommandEncoder for super::CommandEncoder { src: &super::Buffer, dst: &super::Texture, regions: T, - ) { + ) where + T: Iterator, + { } unsafe fn copy_texture_to_buffer( @@ -193,7 +282,17 @@ impl crate::CommandEncoder for super::CommandEncoder { src_usage: crate::TextureUses, dst: &super::Buffer, regions: T, - ) { + ) where + T: Iterator, + { + for r in regions { + let (_base_array_layer, _array_layer_count) = match src.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { + (r.texture_base.origin.z, r.size.depth_or_array_layers) + } + wgt::TextureDimension::D3 => (0, 1), + }; + } } unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) {} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index a3fdefd87a..f2747672b8 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -574,13 +574,8 @@ impl crate::Device for super::Device { hr.to_device_result("Texture creation")?; Ok(super::Texture { resource, + dimension: desc.dimension, size: desc.size, - array_layer_count: match desc.dimension { - wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { - desc.size.depth_or_array_layers - } - wgt::TextureDimension::D3 => 1, - }, mip_level_count: desc.mip_level_count, sample_count: desc.sample_count, }) diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index eab2fd98c2..542c845907 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -220,8 +220,8 @@ unsafe impl Sync for Buffer {} #[derive(Debug)] pub struct Texture { resource: native::Resource, + dimension: wgt::TextureDimension, size: wgt::Extent3d, - array_layer_count: u32, mip_level_count: u32, sample_count: u32, } @@ -230,8 +230,17 @@ unsafe impl Send for Texture {} unsafe impl Sync for Texture {} impl Texture { + fn array_layer_count(&self) -> u32 { + match self.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { + self.size.depth_or_array_layers + } + wgt::TextureDimension::D3 => 1, + } + } + fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { - mip_level + (array_layer + plane * self.array_layer_count) * self.mip_level_count + mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 088acdc67c..fefee510e7 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1011,6 +1011,9 @@ pub struct TextureCopyBase { pub aspect: FormatAspects, } +//TODO: all the copy operations really want to separate +// array layers from Z, so this should not use `wgt::Extent3d`, +// and potentially work with a single layer at a time. #[derive(Clone, Debug)] pub struct TextureCopy { pub src_base: TextureCopyBase, From 35ee65707ff9b77d3f875cdc39611a021d78cd92 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 7 Jul 2021 23:34:59 -0400 Subject: [PATCH 16/33] hal: make copy to work on one array layer at a time --- wgpu-core/src/command/transfer.rs | 165 ++++++++++++---------- wgpu-core/src/device/mod.rs | 15 +- wgpu-core/src/device/queue.rs | 40 +++--- wgpu-hal/examples/halmark/main.rs | 7 +- wgpu-hal/src/dx12/command.rs | 70 ++-------- wgpu-hal/src/gles/queue.rs | 221 +++++++++++++----------------- wgpu-hal/src/lib.rs | 25 +++- wgpu-hal/src/metal/command.rs | 92 ++++++------- wgpu-hal/src/metal/conv.rs | 38 ++--- wgpu-hal/src/vulkan/command.rs | 13 +- wgpu-hal/src/vulkan/conv.rs | 56 ++------ wgpu-hal/src/vulkan/device.rs | 13 +- wgpu-hal/src/vulkan/instance.rs | 1 - wgpu-hal/src/vulkan/mod.rs | 1 - 14 files changed, 346 insertions(+), 411 deletions(-) diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index ef974c15bf..5b679c40e0 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -120,27 +120,34 @@ pub(crate) fn extract_texture_selector( }); } - let layers = match texture.desc.dimension { - wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { - copy_texture.origin.z..copy_texture.origin.z + copy_size.depth_or_array_layers - } - wgt::TextureDimension::D3 => 0..1, - }; - let selector = TextureSelector { - levels: copy_texture.mip_level..copy_texture.mip_level + 1, - layers, + let (layers, origin_z) = match texture.desc.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => ( + copy_texture.origin.z..copy_texture.origin.z + copy_size.depth_or_array_layers, + 0, + ), + wgt::TextureDimension::D3 => (0..1, copy_texture.origin.z), }; let base = hal::TextureCopyBase { - origin: copy_texture.origin, + origin: wgt::Origin3d { + x: copy_texture.origin.x, + y: copy_texture.origin.y, + z: origin_z, + }, + // this value will be incremented per copied layer + array_layer: layers.start, mip_level: copy_texture.mip_level, aspect: copy_aspect, }; + let selector = TextureSelector { + levels: copy_texture.mip_level..copy_texture.mip_level + 1, + layers, + }; Ok((selector, base, format)) } /// Function copied with some modifications from webgpu standard -/// If successful, returns number of buffer bytes required for this copy. +/// If successful, returns (number of buffer bytes required for this copy, number of bytes between array layers). pub(crate) fn validate_linear_texture_data( layout: &wgt::ImageDataLayout, format: wgt::TextureFormat, @@ -149,7 +156,7 @@ pub(crate) fn validate_linear_texture_data( bytes_per_block: BufferAddress, copy_size: &Extent3d, need_copy_aligned_rows: bool, -) -> Result { +) -> Result<(BufferAddress, BufferAddress), TransferError> { // Convert all inputs to BufferAddress (u64) to prevent overflow issues let copy_width = copy_size.width as BufferAddress; let copy_height = copy_size.height as BufferAddress; @@ -202,10 +209,10 @@ pub(crate) fn validate_linear_texture_data( } let bytes_in_last_row = block_size * width_in_blocks; + let bytes_per_image = bytes_per_row * block_rows_per_image; let required_bytes_in_copy = if copy_width == 0 || copy_height == 0 || copy_depth == 0 { 0 } else { - let bytes_per_image = bytes_per_row * block_rows_per_image; let bytes_in_last_slice = bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row; bytes_per_image * (copy_depth - 1) + bytes_in_last_slice }; @@ -227,17 +234,17 @@ pub(crate) fn validate_linear_texture_data( if copy_height > 1 && bytes_per_row < bytes_in_last_row { return Err(TransferError::InvalidBytesPerRow); } - Ok(required_bytes_in_copy) + Ok((required_bytes_in_copy, bytes_per_image)) } /// Function copied with minor modifications from webgpu standard -/// Returns the (virtual) mip level extent. +/// Returns the HAL copy extent and the layer count. pub(crate) fn validate_texture_copy_range( texture_copy_view: &ImageCopyTexture, desc: &wgt::TextureDescriptor<()>, texture_side: CopySide, copy_size: &Extent3d, -) -> Result { +) -> Result<(hal::CopyExtent, u32), TransferError> { let (block_width, block_height) = desc.format.describe().block_dimensions; let block_width = block_width as u32; let block_height = block_height as u32; @@ -295,7 +302,28 @@ pub(crate) fn validate_texture_copy_range( return Err(TransferError::UnalignedCopyHeight); } - Ok(extent_virtual) + let (depth, array_layer_count) = match desc.dimension { + wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { + (1, copy_size.depth_or_array_layers) + } + wgt::TextureDimension::D3 => ( + copy_size + .depth_or_array_layers + .min(extent_virtual.depth_or_array_layers), + 1, + ), + }; + + // WebGPU uses the physical size of the texture for copies whereas vulkan uses + // the virtual size. We have passed validation, so it's safe to use the + // image extent data directly. We want the provided copy size to be no larger than + // the virtual size. + let copy_extent = hal::CopyExtent { + width: copy_size.width.min(extent_virtual.width), + height: copy_size.width.min(extent_virtual.height), + depth, + }; + Ok((copy_extent, array_layer_count)) } impl Global { @@ -505,13 +533,13 @@ impl Global { let dst_barriers = dst_pending.map(|pending| pending.into_hal(dst_texture)); let format_desc = dst_texture.desc.format.describe(); - let max_image_extent = validate_texture_copy_range( + let (hal_copy_size, array_layer_count) = validate_texture_copy_range( destination, &dst_texture.desc, CopySide::Destination, copy_size, )?; - let required_buffer_bytes_in_copy = validate_linear_texture_data( + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( &source.layout, dst_texture.desc.format, src_buffer.size, @@ -538,24 +566,22 @@ impl Global { ); } - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::BufferTextureCopy { - buffer_layout: source.layout, - texture_base: dst_base, - size: Extent3d { - width: copy_size.width.min(max_image_extent.width), - height: copy_size.height.min(max_image_extent.height), - depth_or_array_layers: copy_size.depth_or_array_layers, - }, - }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = source.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_buffers(src_barriers); cmd_buf_raw.transition_textures(dst_barriers); - cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, iter::once(region)); + cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions); } Ok(()) } @@ -635,9 +661,9 @@ impl Global { let dst_barriers = dst_pending.map(|pending| pending.into_hal(dst_buffer)); let format_desc = src_texture.desc.format.describe(); - let max_image_extent = + let (hal_copy_size, array_layer_count) = validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; - let required_buffer_bytes_in_copy = validate_linear_texture_data( + let (required_buffer_bytes_in_copy, bytes_per_array_layer) = validate_linear_texture_data( &destination.layout, src_texture.desc.format, dst_buffer.size, @@ -667,19 +693,17 @@ impl Global { }), ); - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::BufferTextureCopy { - buffer_layout: destination.layout, - texture_base: src_base, - size: Extent3d { - width: copy_size.width.min(max_image_extent.width), - height: copy_size.height.min(max_image_extent.height), - depth_or_array_layers: copy_size.depth_or_array_layers, - }, - }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = src_base.clone(); + texture_base.array_layer += rel_array_layer; + let mut buffer_layout = destination.layout; + buffer_layout.offset += rel_array_layer as u64 * bytes_per_array_layer; + hal::BufferTextureCopy { + buffer_layout, + texture_base, + size: hal_copy_size, + } + }); let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_buffers(dst_barriers); @@ -688,7 +712,7 @@ impl Global { src_raw, hal::TextureUses::COPY_SRC, dst_raw, - iter::once(region), + regions, ); } Ok(()) @@ -725,11 +749,11 @@ impl Global { return Ok(()); } - let (src_range, src_base, _) = + let (src_range, src_tex_base, _) = extract_texture_selector(source, copy_size, &*texture_guard)?; - let (dst_range, dst_base, _) = + let (dst_range, dst_tex_base, _) = extract_texture_selector(destination, copy_size, &*texture_guard)?; - if src_base.aspect != dst_base.aspect { + if src_tex_base.aspect != dst_tex_base.aspect { return Err(TransferError::MismatchedAspects.into()); } @@ -777,32 +801,31 @@ impl Global { } barriers.extend(dst_pending.map(|pending| pending.into_hal(dst_texture))); - let max_src_image_extent = + let (src_copy_size, array_layer_count) = validate_texture_copy_range(source, &src_texture.desc, CopySide::Source, copy_size)?; - let max_dst_image_extent = validate_texture_copy_range( + let (dst_copy_size, _) = validate_texture_copy_range( destination, &dst_texture.desc, CopySide::Destination, copy_size, )?; - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::TextureCopy { - src_base, - dst_base, - size: Extent3d { - width: copy_size - .width - .min(max_src_image_extent.width.min(max_dst_image_extent.width)), - height: copy_size - .height - .min(max_src_image_extent.height.min(max_dst_image_extent.height)), - depth_or_array_layers: copy_size.depth_or_array_layers, - }, + let hal_copy_size = hal::CopyExtent { + width: src_copy_size.width.min(dst_copy_size.width), + height: src_copy_size.height.min(dst_copy_size.height), + depth: src_copy_size.depth.min(dst_copy_size.depth), }; + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut src_base = src_tex_base.clone(); + let mut dst_base = dst_tex_base.clone(); + src_base.array_layer += rel_array_layer; + dst_base.array_layer += rel_array_layer; + hal::TextureCopy { + src_base, + dst_base, + size: hal_copy_size, + } + }); let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_textures(barriers.into_iter()); @@ -810,7 +833,7 @@ impl Global { src_raw, hal::TextureUses::COPY_SRC, dst_raw, - iter::once(region), + regions, ); } Ok(()) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index f418783970..a315458180 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -732,10 +732,13 @@ impl Device { let usage = { let mask_copy = !(hal::TextureUses::COPY_SRC | hal::TextureUses::COPY_DST); let mask_dimension = match view_dim { - wgt::TextureViewDimension::Cube | - wgt::TextureViewDimension::CubeArray => hal::TextureUses::SAMPLED, + wgt::TextureViewDimension::Cube | wgt::TextureViewDimension::CubeArray => { + hal::TextureUses::SAMPLED + } wgt::TextureViewDimension::D3 => { - hal::TextureUses::SAMPLED | hal::TextureUses::STORAGE_LOAD | hal::TextureUses::STORAGE_STORE + hal::TextureUses::SAMPLED + | hal::TextureUses::STORAGE_LOAD + | hal::TextureUses::STORAGE_STORE } _ => hal::TextureUses::all(), }; @@ -747,7 +750,11 @@ impl Device { texture.hal_usage & mask_copy & mask_dimension & mask_mip_level }; - log::debug!("Create view for texture {:?} filters usages to {:?}", texture_id, usage); + log::debug!( + "Create view for texture {:?} filters usages to {:?}", + texture_id, + usage + ); let hal_desc = hal::TextureViewDescriptor { label: desc.label.borrow_option(), format, diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index a68766a835..e302bd8c6a 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -433,10 +433,10 @@ impl Global { } let (texture_guard, _) = hub.textures.read(&mut token); - let (selector, texture_base, texture_format) = + let (selector, dst_base, texture_format) = extract_texture_selector(destination, size, &*texture_guard)?; let format_desc = texture_format.describe(); - validate_linear_texture_data( + let (_, bytes_per_array_layer) = validate_linear_texture_data( data_layout, texture_format, data.len() as wgt::BufferAddress, @@ -495,7 +495,7 @@ impl Global { TransferError::MissingCopyDstUsageFlag(None, Some(destination.texture)).into(), ); } - let max_image_extent = + let (hal_copy_size, array_layer_count) = validate_texture_copy_range(destination, &dst.desc, CopySide::Destination, size)?; dst.life_guard.use_at(device.active_submission_index + 1); @@ -542,33 +542,29 @@ impl Global { .map_err(DeviceError::from)?; } - // WebGPU uses the physical size of the texture for copies whereas vulkan uses - // the virtual size. We have passed validation, so it's safe to use the - // image extent data directly. We want the provided copy size to be no larger than - // the virtual size. - let region = hal::BufferTextureCopy { - buffer_layout: wgt::ImageDataLayout { - offset: 0, - bytes_per_row: NonZeroU32::new(stage_bytes_per_row), - rows_per_image: NonZeroU32::new(block_rows_per_image), - }, - texture_base, - size: wgt::Extent3d { - width: size.width.min(max_image_extent.width), - height: size.height.min(max_image_extent.height), - depth_or_array_layers: size.depth_or_array_layers, - }, - }; - + let regions = (0..array_layer_count).map(|rel_array_layer| { + let mut texture_base = dst_base.clone(); + texture_base.array_layer += rel_array_layer; + hal::BufferTextureCopy { + buffer_layout: wgt::ImageDataLayout { + offset: rel_array_layer as u64 * bytes_per_array_layer, + bytes_per_row: NonZeroU32::new(stage_bytes_per_row), + rows_per_image: NonZeroU32::new(block_rows_per_image), + }, + texture_base, + size: hal_copy_size, + } + }); let barrier = hal::BufferBarrier { buffer: &stage.buffer, usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, }; + let encoder = device.pending_writes.activate(); unsafe { encoder.transition_buffers(iter::once(barrier)); encoder.transition_textures(transition.map(|pending| pending.into_hal(dst))); - encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, iter::once(region)); + encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); } device.pending_writes.consume(stage); diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 047d0599d3..fb6c102028 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -302,9 +302,14 @@ impl Example { texture_base: hal::TextureCopyBase { origin: wgt::Origin3d::ZERO, mip_level: 0, + array_layer: 0, aspect: hal::FormatAspects::COLOR, }, - size: texture_desc.size, + size: hal::CopyExtent { + width: 1, + height: 1, + depth: 1, + }, }; unsafe { cmd_encoder.transition_buffers(iter::once(buffer_barrier)); diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 6068be44e5..642e734bad 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -210,59 +210,26 @@ impl crate::CommandEncoder for super::CommandEncoder { }; for r in regions { - let ( - depth, - array_layer_count, - src_z, - src_base_array_layer, - dst_z, - dst_base_array_layer, - ) = match src.dimension { - wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => ( - 1, - r.size.depth_or_array_layers, - 0, - r.src_base.origin.z, - 0, - r.dst_base.origin.z, - ), - wgt::TextureDimension::D3 => ( - r.size.depth_or_array_layers, - 1, - r.src_base.origin.z, - 0, - r.dst_base.origin.z, - 0, - ), - }; let src_box = d3d12::D3D12_BOX { left: r.src_base.origin.x, top: r.src_base.origin.y, right: r.src_base.origin.x + r.size.width, bottom: r.src_base.origin.y + r.size.height, - front: src_z, - back: src_z + depth, + front: r.src_base.origin.z, + back: r.src_base.origin.z + r.size.depth, }; - for rel_array_layer in 0..array_layer_count { - *src_location.u.SubresourceIndex_mut() = src.calc_subresource( - r.src_base.mip_level, - src_base_array_layer + rel_array_layer, - 0, - ); - *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource( - r.dst_base.mip_level, - dst_base_array_layer + rel_array_layer, - 0, - ); - list.CopyTextureRegion( - &dst_location, - r.dst_base.origin.x, - r.dst_base.origin.y, - dst_z, - &src_location, - &src_box, - ); - } + *src_location.u.SubresourceIndex_mut() = + src.calc_subresource(r.src_base.mip_level, r.src_base.array_layer, 0); + *dst_location.u.SubresourceIndex_mut() = + dst.calc_subresource(r.dst_base.mip_level, r.dst_base.array_layer, 0); + list.CopyTextureRegion( + &dst_location, + r.dst_base.origin.x, + r.dst_base.origin.y, + r.dst_base.origin.z, + &src_location, + &src_box, + ); } } @@ -285,14 +252,7 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { - for r in regions { - let (_base_array_layer, _array_layer_count) = match src.dimension { - wgt::TextureDimension::D1 | wgt::TextureDimension::D2 => { - (r.texture_base.origin.z, r.size.depth_or_array_layers) - } - wgt::TextureDimension::D3 => (0, 1), - }; - } + for _r in regions {} } unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) {} diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 9e4cf285cd..2b0dbc522a 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -201,52 +201,51 @@ impl super::Queue { ref copy, } => { //TODO: cubemaps - //TODO: how is depth handled? + //TODO: handle 3D copies gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)); - for layer in 0..copy.size.depth_or_array_layers as i32 { - if is_3d_target(src_target) { - //TODO: handle GLES without framebuffer_texture_3d - gl.framebuffer_texture_layer( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - Some(src), - copy.src_base.mip_level as i32, - copy.src_base.origin.z as i32 + layer, - ); - } else { - gl.framebuffer_texture_2d( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - src_target, - Some(src), - copy.src_base.mip_level as i32, - ); - } - gl.bind_texture(dst_target, Some(dst)); - if is_3d_target(dst_target) { - gl.copy_tex_sub_image_3d( - dst_target, - copy.dst_base.mip_level as i32, - copy.dst_base.origin.x as i32, - copy.dst_base.origin.y as i32, - copy.dst_base.origin.z as i32 + layer, - copy.src_base.origin.x as i32, - copy.src_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - ); - } else { - gl.copy_tex_sub_image_2d( - dst_target, - copy.dst_base.mip_level as i32, - copy.dst_base.origin.x as i32, - copy.dst_base.origin.y as i32, - copy.src_base.origin.x as i32, - copy.src_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - ); - } + if is_3d_target(src_target) { + //TODO: handle GLES without framebuffer_texture_3d + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.src_base.mip_level as i32, + copy.src_base.array_layer as i32, + ); + } else { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.src_base.mip_level as i32, + ); + } + + gl.bind_texture(dst_target, Some(dst)); + if is_3d_target(dst_target) { + gl.copy_tex_sub_image_3d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.dst_base.origin.z as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ); + } else { + gl.copy_tex_sub_image_2d( + dst_target, + copy.dst_base.mip_level as i32, + copy.dst_base.origin.x as i32, + copy.dst_base.origin.y as i32, + copy.src_base.origin.x as i32, + copy.src_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + ); } } C::CopyBufferToTexture { @@ -286,7 +285,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.external, format_desc.data_type, unpack_data, @@ -306,26 +305,18 @@ impl super::Queue { ); } glow::TEXTURE_CUBE_MAP => { - let mut offset = copy.buffer_layout.offset as u32; - for face_index in 0..copy.size.depth_or_array_layers { - gl.tex_sub_image_2d( - CUBEMAP_FACES - [(copy.texture_base.origin.z + face_index) as usize], - copy.texture_base.mip_level as i32, - copy.texture_base.origin.x as i32, - copy.texture_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - format_desc.external, - format_desc.data_type, - glow::PixelUnpackData::BufferOffset(offset), - ); - offset += copy - .buffer_layout - .rows_per_image - .map_or(0, |rpi| rpi.get()) - * copy.buffer_layout.bytes_per_row.map_or(0, |bpr| bpr.get()); - } + let offset = copy.buffer_layout.offset as u32; + gl.tex_sub_image_2d( + CUBEMAP_FACES[copy.texture_base.array_layer as usize], + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + glow::PixelUnpackData::BufferOffset(offset), + ); } glow::TEXTURE_CUBE_MAP_ARRAY => { //Note: not sure if this is correct! @@ -337,7 +328,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.external, format_desc.data_type, unpack_data, @@ -349,10 +340,9 @@ impl super::Queue { let bytes_per_image = copy.buffer_layout.rows_per_image.map_or(1, |rpi| rpi.get()) * copy.buffer_layout.bytes_per_row.map_or(1, |bpr| bpr.get()); - let offset_end = copy.buffer_layout.offset as u32 - + bytes_per_image * copy.size.depth_or_array_layers; + let offset = copy.buffer_layout.offset as u32; let unpack_data = glow::CompressedPixelUnpackData::BufferRange( - copy.buffer_layout.offset as u32..offset_end, + offset..offset + bytes_per_image, ); match dst_target { glow::TEXTURE_3D | glow::TEXTURE_2D_ARRAY => { @@ -364,7 +354,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.internal, unpack_data, ); @@ -382,23 +372,18 @@ impl super::Queue { ); } glow::TEXTURE_CUBE_MAP => { - let mut offset = copy.buffer_layout.offset as u32; - for face_index in 0..copy.size.depth_or_array_layers { - gl.compressed_tex_sub_image_2d( - CUBEMAP_FACES - [(copy.texture_base.origin.z + face_index) as usize], - copy.texture_base.mip_level as i32, - copy.texture_base.origin.x as i32, - copy.texture_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - format_desc.internal, - glow::CompressedPixelUnpackData::BufferRange( - offset..offset + bytes_per_image, - ), - ); - offset += bytes_per_image; - } + gl.compressed_tex_sub_image_2d( + CUBEMAP_FACES[copy.texture_base.array_layer as usize], + copy.texture_base.mip_level as i32, + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.internal, + glow::CompressedPixelUnpackData::BufferRange( + offset..offset + bytes_per_image, + ), + ); } glow::TEXTURE_CUBE_MAP_ARRAY => { //Note: not sure if this is correct! @@ -410,7 +395,7 @@ impl super::Queue { copy.texture_base.origin.z as i32, copy.size.width as i32, copy.size.height as i32, - copy.size.depth_or_array_layers as i32, + copy.size.depth as i32, format_desc.internal, unpack_data, ); @@ -445,45 +430,37 @@ impl super::Queue { .map_or(copy.size.width, |bpr| { bpr.get() / format_info.block_size as u32 }); - let column_texels = copy - .buffer_layout - .rows_per_image - .map_or(copy.size.height, |rpi| rpi.get()); gl.pixel_store_i32(glow::PACK_ROW_LENGTH, row_texels as i32); gl.bind_buffer(glow::PIXEL_PACK_BUFFER, Some(dst)); gl.bind_framebuffer(glow::READ_FRAMEBUFFER, Some(self.copy_fbo)); - for layer in 0..copy.size.depth_or_array_layers { - let offset = copy.buffer_layout.offset as u32 - + layer * column_texels * row_texels * format_info.block_size as u32; - if is_3d_target(src_target) { - //TODO: handle GLES without framebuffer_texture_3d - gl.framebuffer_texture_layer( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - Some(src), - copy.texture_base.mip_level as i32, - copy.texture_base.origin.z as i32 + layer as i32, - ); - } else { - gl.framebuffer_texture_2d( - glow::READ_FRAMEBUFFER, - glow::COLOR_ATTACHMENT0, - src_target, - Some(src), - copy.texture_base.mip_level as i32, - ); - } - gl.read_pixels( - copy.texture_base.origin.x as i32, - copy.texture_base.origin.y as i32, - copy.size.width as i32, - copy.size.height as i32, - format_desc.external, - format_desc.data_type, - glow::PixelPackData::BufferOffset(offset), + if is_3d_target(src_target) { + //TODO: handle GLES without framebuffer_texture_3d + gl.framebuffer_texture_layer( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + Some(src), + copy.texture_base.mip_level as i32, + copy.texture_base.array_layer as i32, + ); + } else { + gl.framebuffer_texture_2d( + glow::READ_FRAMEBUFFER, + glow::COLOR_ATTACHMENT0, + src_target, + Some(src), + copy.texture_base.mip_level as i32, ); } + gl.read_pixels( + copy.texture_base.origin.x as i32, + copy.texture_base.origin.y as i32, + copy.size.width as i32, + copy.size.height as i32, + format_desc.external, + format_desc.data_type, + glow::PixelPackData::BufferOffset(copy.buffer_layout.offset as u32), + ); } C::SetIndexBuffer(buffer) => { gl.bind_buffer(glow::ELEMENT_ARRAY_BUFFER, Some(buffer)); diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index fefee510e7..97ae478821 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -351,6 +351,8 @@ pub trait CommandEncoder: Send + Sync { where T: Iterator; + /// Copy from one texture to another. + /// Works with a single array layer. /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. unsafe fn copy_texture_to_texture( &mut self, @@ -361,11 +363,15 @@ pub trait CommandEncoder: Send + Sync { ) where T: Iterator; + /// Copy from buffer to texture. + /// Works with a single array layer. /// Note: `dst` current usage has to be `TextureUses::COPY_DST`. unsafe fn copy_buffer_to_texture(&mut self, src: &A::Buffer, dst: &A::Texture, regions: T) where T: Iterator; + /// Copy from texture to buffer. + /// Works with a single array layer. unsafe fn copy_texture_to_buffer( &mut self, src: &A::Texture, @@ -1006,26 +1012,33 @@ pub struct BufferCopy { #[derive(Clone, Debug)] pub struct TextureCopyBase { - pub origin: wgt::Origin3d, pub mip_level: u32, + pub array_layer: u32, + /// Origin within a texture. + /// Note: for 1D and 2D textures, Z must be 0. + pub origin: wgt::Origin3d, pub aspect: FormatAspects, } -//TODO: all the copy operations really want to separate -// array layers from Z, so this should not use `wgt::Extent3d`, -// and potentially work with a single layer at a time. +#[derive(Clone, Copy, Debug)] +pub struct CopyExtent { + pub width: u32, + pub height: u32, + pub depth: u32, +} + #[derive(Clone, Debug)] pub struct TextureCopy { pub src_base: TextureCopyBase, pub dst_base: TextureCopyBase, - pub size: wgt::Extent3d, + pub size: CopyExtent, } #[derive(Clone, Debug)] pub struct BufferTextureCopy { pub buffer_layout: wgt::ImageDataLayout, pub texture_base: TextureCopyBase, - pub size: wgt::Extent3d, + pub size: CopyExtent, } #[derive(Debug)] diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index 9fbaf3c042..e395f9320c 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -156,22 +156,20 @@ impl crate::CommandEncoder for super::CommandEncoder { { let encoder = self.enter_blit(); for copy in regions { - let (src_slice, src_origin) = conv::map_origin(©.src_base.origin, src.raw_type); - let (dst_slice, dst_origin) = conv::map_origin(©.dst_base.origin, dst.raw_type); - let (slice_count, extent) = conv::map_extent(©.size, src.raw_type); - for slice in 0..slice_count { - encoder.copy_from_texture( - &src.raw, - src_slice + slice, - copy.src_base.mip_level as u64, - src_origin, - extent, - &dst.raw, - dst_slice + slice, - copy.dst_base.mip_level as u64, - dst_origin, - ); - } + let src_origin = conv::map_origin(©.src_base.origin); + let dst_origin = conv::map_origin(©.dst_base.origin); + let extent = conv::map_copy_extent(©.size); + encoder.copy_from_texture( + &src.raw, + copy.src_base.array_layer as u64, + copy.src_base.mip_level as u64, + src_origin, + extent, + &dst.raw, + copy.dst_base.array_layer as u64, + copy.dst_base.mip_level as u64, + dst_origin, + ); } } @@ -185,8 +183,8 @@ impl crate::CommandEncoder for super::CommandEncoder { { let encoder = self.enter_blit(); for copy in regions { - let (dst_slice, dst_origin) = conv::map_origin(©.texture_base.origin, dst.raw_type); - let (slice_count, extent) = conv::map_extent(©.size, dst.raw_type); + let dst_origin = conv::map_origin(©.texture_base.origin); + let extent = conv::map_copy_extent(©.size); let bytes_per_row = copy .buffer_layout .bytes_per_row @@ -195,21 +193,18 @@ impl crate::CommandEncoder for super::CommandEncoder { .buffer_layout .rows_per_image .map_or(0, |v| v.get() as u64 * bytes_per_row); - for slice in 0..slice_count { - let offset = copy.buffer_layout.offset + bytes_per_image * slice; - encoder.copy_from_buffer_to_texture( - &src.raw, - offset, - bytes_per_row, - bytes_per_image, - extent, - &dst.raw, - dst_slice + slice, - copy.texture_base.mip_level as u64, - dst_origin, - mtl::MTLBlitOption::empty(), - ); - } + encoder.copy_from_buffer_to_texture( + &src.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + extent, + &dst.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + dst_origin, + mtl::MTLBlitOption::empty(), + ); } } @@ -224,8 +219,8 @@ impl crate::CommandEncoder for super::CommandEncoder { { let encoder = self.enter_blit(); for copy in regions { - let (src_slice, src_origin) = conv::map_origin(©.texture_base.origin, src.raw_type); - let (slice_count, extent) = conv::map_extent(©.size, src.raw_type); + let src_origin = conv::map_origin(©.texture_base.origin); + let extent = conv::map_copy_extent(©.size); let bytes_per_row = copy .buffer_layout .bytes_per_row @@ -234,21 +229,18 @@ impl crate::CommandEncoder for super::CommandEncoder { .buffer_layout .rows_per_image .map_or(0, |v| v.get() as u64 * bytes_per_row); - for slice in 0..slice_count { - let offset = copy.buffer_layout.offset + bytes_per_image * slice; - encoder.copy_from_texture_to_buffer( - &src.raw, - src_slice + slice, - copy.texture_base.mip_level as u64, - src_origin, - extent, - &dst.raw, - offset, - bytes_per_row, - bytes_per_image, - mtl::MTLBlitOption::empty(), - ); - } + encoder.copy_from_texture_to_buffer( + &src.raw, + copy.texture_base.array_layer as u64, + copy.texture_base.mip_level as u64, + src_origin, + extent, + &dst.raw, + copy.buffer_layout.offset, + bytes_per_row, + bytes_per_image, + mtl::MTLBlitOption::empty(), + ); } } diff --git a/wgpu-hal/src/metal/conv.rs b/wgpu-hal/src/metal/conv.rs index 61ae6cab14..b1c7e0b303 100644 --- a/wgpu-hal/src/metal/conv.rs +++ b/wgpu-hal/src/metal/conv.rs @@ -261,34 +261,20 @@ pub fn map_range(range: &crate::MemoryRange) -> mtl::NSRange { } } -pub fn map_extent(extent: &wgt::Extent3d, raw_type: mtl::MTLTextureType) -> (u64, mtl::MTLSize) { - let (depth, array_layers) = match raw_type { - mtl::MTLTextureType::D3 => (extent.depth_or_array_layers as u64, 1), - _ => (1, extent.depth_or_array_layers as u64), - }; - ( - array_layers, - mtl::MTLSize { - width: extent.width as u64, - height: extent.height as u64, - depth, - }, - ) +pub fn map_copy_extent(extent: &crate::CopyExtent) -> mtl::MTLSize { + mtl::MTLSize { + width: extent.width as u64, + height: extent.height as u64, + depth: extent.depth as u64, + } } -pub fn map_origin(origin: &wgt::Origin3d, raw_type: mtl::MTLTextureType) -> (u64, mtl::MTLOrigin) { - let (z, slice) = match raw_type { - mtl::MTLTextureType::D3 => (origin.z as u64, 0), - _ => (0, origin.z as u64), - }; - ( - slice, - mtl::MTLOrigin { - x: origin.x as u64, - y: origin.y as u64, - z, - }, - ) +pub fn map_origin(origin: &wgt::Origin3d) -> mtl::MTLOrigin { + mtl::MTLOrigin { + x: origin.x as u64, + y: origin.y as u64, + z: origin.z as u64, + } } pub fn map_store_action(store: bool, resolve: bool) -> mtl::MTLStoreAction { diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index 9b2f7f4630..fded155bd2 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -18,13 +18,11 @@ impl super::Texture { where T: Iterator, { - let dim = self.dim; let aspects = self.aspects; let fi = self.format_info; regions.map(move |r| { - let (layer_count, image_extent) = conv::map_extent(r.size, dim); let (image_subresource, image_offset) = - conv::map_subresource_layers(&r.texture_base, dim, aspects, layer_count); + conv::map_subresource_layers(&r.texture_base, aspects); vk::BufferImageCopy { buffer_offset: r.buffer_layout.offset, buffer_row_length: r.buffer_layout.bytes_per_row.map_or(0, |bpr| { @@ -36,7 +34,7 @@ impl super::Texture { .map_or(0, |rpi| rpi.get() * fi.block_dimensions.1 as u32), image_subresource, image_offset, - image_extent, + image_extent: conv::map_copy_extent(&r.size), } }) } @@ -228,17 +226,16 @@ impl crate::CommandEncoder for super::CommandEncoder { let src_layout = conv::derive_image_layout(src_usage, src.aspects); let vk_regions_iter = regions.map(|r| { - let (layer_count, extent) = conv::map_extent(r.size, src.dim); let (src_subresource, src_offset) = - conv::map_subresource_layers(&r.src_base, src.dim, src.aspects, layer_count); + conv::map_subresource_layers(&r.src_base, src.aspects); let (dst_subresource, dst_offset) = - conv::map_subresource_layers(&r.dst_base, dst.dim, dst.aspects, layer_count); + conv::map_subresource_layers(&r.dst_base, dst.aspects); vk::ImageCopy { src_subresource, src_offset, dst_subresource, dst_offset, - extent, + extent: conv::map_copy_extent(&r.size), } }); diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index b6bb4ca10b..ffa38a4c18 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -352,42 +352,6 @@ pub fn map_aspects(aspects: crate::FormatAspects) -> vk::ImageAspectFlags { flags } -pub fn map_origin( - origin: wgt::Origin3d, - texture_dim: wgt::TextureDimension, -) -> (u32, vk::Offset3D) { - let (z, array_layer) = match texture_dim { - wgt::TextureDimension::D3 => (origin.z as i32, 0), - _ => (0, origin.z), - }; - ( - array_layer, - vk::Offset3D { - x: origin.x as i32, - y: origin.y as i32, - z, - }, - ) -} - -pub fn map_extent( - extent: wgt::Extent3d, - texture_dim: wgt::TextureDimension, -) -> (u32, vk::Extent3D) { - let (depth, array_layers) = match texture_dim { - wgt::TextureDimension::D3 => (extent.depth_or_array_layers, 1), - _ => (1, extent.depth_or_array_layers), - }; - ( - array_layers, - vk::Extent3D { - width: extent.width, - height: extent.height, - depth, - }, - ) -} - pub fn map_attachment_ops( op: crate::AttachmentOps, ) -> (vk::AttachmentLoadOp, vk::AttachmentStoreOp) { @@ -541,6 +505,14 @@ pub fn map_view_dimension(dim: wgt::TextureViewDimension) -> vk::ImageViewType { } } +pub fn map_copy_extent(extent: &crate::CopyExtent) -> vk::Extent3D { + vk::Extent3D { + width: extent.width, + height: extent.height, + depth: extent.depth, + } +} + pub fn map_subresource_range( range: &wgt::ImageSubresourceRange, texture_aspect: crate::FormatAspects, @@ -560,16 +532,18 @@ pub fn map_subresource_range( pub fn map_subresource_layers( base: &crate::TextureCopyBase, - texture_dim: wgt::TextureDimension, texture_aspect: crate::FormatAspects, - layer_count: u32, ) -> (vk::ImageSubresourceLayers, vk::Offset3D) { - let (base_array_layer, offset) = map_origin(base.origin, texture_dim); + let offset = vk::Offset3D { + x: base.origin.x as i32, + y: base.origin.y as i32, + z: base.origin.z as i32, + }; let subresource = vk::ImageSubresourceLayers { aspect_mask: map_aspects(base.aspect & texture_aspect), mip_level: base.mip_level, - base_array_layer, - layer_count, + base_array_layer: base.array_layer, + layer_count: 1, }; (subresource, offset) } diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 47fc9a45f4..64156cf218 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -654,7 +654,11 @@ impl crate::Device for super::Device { &self, desc: &crate::TextureDescriptor, ) -> Result { - let (array_layer_count, vk_extent) = conv::map_extent(desc.size, desc.dimension); + let (depth, array_layer_count) = match desc.dimension { + wgt::TextureDimension::D3 => (desc.size.depth_or_array_layers, 1), + _ => (1, desc.size.depth_or_array_layers), + }; + let mut raw_flags = vk::ImageCreateFlags::empty(); if desc.dimension == wgt::TextureDimension::D2 && desc.size.depth_or_array_layers % 6 == 0 { raw_flags |= vk::ImageCreateFlags::CUBE_COMPATIBLE; @@ -664,7 +668,11 @@ impl crate::Device for super::Device { .flags(raw_flags) .image_type(conv::map_texture_dimension(desc.dimension)) .format(self.shared.private_caps.map_texture_format(desc.format)) - .extent(vk_extent) + .extent(vk::Extent3D { + width: desc.size.width, + height: desc.size.height, + depth, + }) .mip_levels(desc.mip_level_count) .array_layers(array_layer_count) .samples(vk::SampleCountFlags::from_raw(desc.sample_count)) @@ -699,7 +707,6 @@ impl crate::Device for super::Device { raw, block: Some(block), usage: desc.usage, - dim: desc.dimension, aspects: crate::FormatAspects::from(desc.format), format_info: desc.format.describe(), raw_flags, diff --git a/wgpu-hal/src/vulkan/instance.rs b/wgpu-hal/src/vulkan/instance.rs index ae3c441034..39261edbed 100644 --- a/wgpu-hal/src/vulkan/instance.rs +++ b/wgpu-hal/src/vulkan/instance.rs @@ -647,7 +647,6 @@ impl crate::Surface for super::Surface { raw: sc.images[index as usize], block: None, usage: sc.config.usage, - dim: wgt::TextureDimension::D2, aspects: crate::FormatAspects::COLOR, format_info: sc.config.format.describe(), raw_flags: vk::ImageCreateFlags::empty(), diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 4b5e17f97c..387d079fd9 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -255,7 +255,6 @@ pub struct Texture { raw: vk::Image, block: Option>, usage: crate::TextureUses, - dim: wgt::TextureDimension, aspects: crate::FormatAspects, format_info: wgt::TextureFormatInfo, raw_flags: vk::ImageCreateFlags, From 2c76b0e656864429e2d9f4807bb711d480071bf5 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 8 Jul 2021 02:02:03 -0400 Subject: [PATCH 17/33] hal/dx12: clippy fixes, buffer-texture copies --- wgpu-hal/src/dx12/adapter.rs | 20 +++---- wgpu-hal/src/dx12/command.rs | 103 +++++++++++++++++++++++++++++----- wgpu-hal/src/dx12/device.rs | 25 +++++---- wgpu-hal/src/dx12/instance.rs | 16 +++--- wgpu-hal/src/dx12/mod.rs | 29 ++++++---- 5 files changed, 136 insertions(+), 57 deletions(-) diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 00220b1619..17dc1a3910 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -21,7 +21,7 @@ impl super::Adapter { ) -> Option> { // Create the device so that we can get the capabilities. let device = match library.create_device(adapter, native::FeatureLevel::L11_0) { - Ok(pair) => match pair.to_result() { + Ok(pair) => match pair.into_result() { Ok(device) => device, Err(err) => { log::warn!("Device creation failed: {}", err); @@ -180,15 +180,11 @@ impl super::Adapter { .max_dynamic_storage_buffers_per_pipeline_layout, max_sampled_textures_per_shader_stage: match options.ResourceBindingTier { d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 128, - d3d12::D3D12_RESOURCE_BINDING_TIER_2 - | d3d12::D3D12_RESOURCE_BINDING_TIER_3 - | _ => full_heap_count, + _ => full_heap_count, }, max_samplers_per_shader_stage: match options.ResourceBindingTier { d3d12::D3D12_RESOURCE_BINDING_TIER_1 => 16, - d3d12::D3D12_RESOURCE_BINDING_TIER_2 - | d3d12::D3D12_RESOURCE_BINDING_TIER_3 - | _ => d3d12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, + _ => d3d12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE, }, // these both account towards `uav_count`, but we can't express the limit as as sum max_storage_buffers_per_shader_stage: base.max_storage_buffers_per_shader_stage, @@ -238,7 +234,7 @@ impl crate::Adapter for super::Adapter { native::CommandQueueFlags::empty(), 0, ) - .to_device_result("Queue creation")?; + .into_device_result("Queue creation")?; let device = super::Device::new(self.device, queue, self.private_caps, &self.library)?; Ok(crate::OpenDevice { @@ -328,7 +324,11 @@ impl crate::Adapter for super::Adapter { let mut present_modes = vec![wgt::PresentMode::Fifo]; #[allow(trivial_casts)] - if let Ok(factory5) = surface.factory.cast::().to_result() { + if let Ok(factory5) = surface + .factory + .cast::() + .into_result() + { let mut allow_tearing: minwindef::BOOL = minwindef::FALSE; let hr = factory5.CheckFeatureSupport( dxgi1_5::DXGI_FEATURE_PRESENT_ALLOW_TEARING, @@ -337,7 +337,7 @@ impl crate::Adapter for super::Adapter { ); factory5.destroy(); - match hr.to_result() { + match hr.into_result() { Err(err) => log::warn!("Unable to check for tearing support: {}", err), Ok(()) => present_modes.push(wgt::PresentMode::Immediate), } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 642e734bad..ecd3a89ead 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -2,6 +2,17 @@ use super::{conv, HResult as _, Resource}; use std::{mem, ops::Range}; use winapi::um::d3d12; +fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12::D3D12_BOX { + d3d12::D3D12_BOX { + left: origin.x, + top: origin.y, + right: origin.x + size.width, + bottom: origin.y + size.height, + front: origin.z, + back: origin.z + size.depth, + } +} + impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { let list = match self.free_lists.pop() { @@ -17,7 +28,7 @@ impl crate::CommandEncoder for super::CommandEncoder { native::PipelineState::null(), 0, ) - .to_device_result("Create command list")?, + .into_device_result("Create command list")?, }; if let Some(label) = label { @@ -135,9 +146,9 @@ impl crate::CommandEncoder for super::CommandEncoder { barrier.range.base_array_layer + rel_array_layer, 0, ); + self.temp.barriers.push(raw); } } - self.temp.barriers.push(raw); } } else if barrier.usage.start == crate::TextureUses::STORAGE_STORE { let mut raw = d3d12::D3D12_RESOURCE_BARRIER { @@ -210,18 +221,10 @@ impl crate::CommandEncoder for super::CommandEncoder { }; for r in regions { - let src_box = d3d12::D3D12_BOX { - left: r.src_base.origin.x, - top: r.src_base.origin.y, - right: r.src_base.origin.x + r.size.width, - bottom: r.src_base.origin.y + r.size.height, - front: r.src_base.origin.z, - back: r.src_base.origin.z + r.size.depth, - }; - *src_location.u.SubresourceIndex_mut() = - src.calc_subresource(r.src_base.mip_level, r.src_base.array_layer, 0); - *dst_location.u.SubresourceIndex_mut() = - dst.calc_subresource(r.dst_base.mip_level, r.dst_base.array_layer, 0); + let src_box = make_box(&r.src_base.origin, &r.size); + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.src_base); + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.dst_base); + list.CopyTextureRegion( &dst_location, r.dst_base.origin.x, @@ -241,6 +244,45 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { + let list = self.list.unwrap(); + let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: mem::zeroed(), + }; + let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let raw_format = conv::map_texture_format(dst.format); + + for r in regions { + let src_box = make_box(&wgt::Origin3d::ZERO, &r.size); + *src_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: r.buffer_layout.offset, + Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { + Format: raw_format, + Width: r.size.width, + Height: r + .buffer_layout + .rows_per_image + .map_or(r.size.height, |count| count.get()), + Depth: r.size.depth, + RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| count.get()), + }, + }; + *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.texture_base); + + list.CopyTextureRegion( + &dst_location, + r.texture_base.origin.x, + r.texture_base.origin.y, + r.texture_base.origin.z, + &src_location, + &src_box, + ); + } } unsafe fn copy_texture_to_buffer( @@ -252,7 +294,38 @@ impl crate::CommandEncoder for super::CommandEncoder { ) where T: Iterator, { - for _r in regions {} + let list = self.list.unwrap(); + let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: src.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + u: mem::zeroed(), + }; + let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION { + pResource: dst.resource.as_mut_ptr(), + Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + u: mem::zeroed(), + }; + let raw_format = conv::map_texture_format(src.format); + + for r in regions { + let dst_box = make_box(&r.texture_base.origin, &r.size); + *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.texture_base); + *dst_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { + Offset: r.buffer_layout.offset, + Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT { + Format: raw_format, + Width: r.size.width, + Height: r + .buffer_layout + .rows_per_image + .map_or(r.size.height, |count| count.get()), + Depth: r.size.depth, + RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| count.get()), + }, + }; + + list.CopyTextureRegion(&src_location, 0, 0, 0, &dst_location, &dst_box); + } } unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) {} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index f2747672b8..ab3b97671d 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -28,7 +28,7 @@ impl super::Device { idle_fence.mut_void(), ) }; - hr.to_device_result("Idle fence creation")?; + hr.into_device_result("Idle fence creation")?; Ok(super::Device { raw, @@ -66,7 +66,7 @@ impl super::Device { .idler .fence .set_event_on_completion(self.idler.event, value); - hr.to_device_result("Set event")?; + hr.into_device_result("Set event")?; synchapi::WaitForSingleObject(self.idler.event.0, winbase::INFINITE); Ok(()) } @@ -431,7 +431,7 @@ impl crate::Device for super::Device { if let Ok(debug_device) = self .raw .cast::() - .to_result() + .into_result() { debug_device.ReportLiveDeviceObjects(d3d12sdklayers::D3D12_RLDO_DETAIL); debug_device.destroy(); @@ -494,7 +494,7 @@ impl crate::Device for super::Device { resource.mut_void(), ); - hr.to_device_result("Buffer creation")?; + hr.into_device_result("Buffer creation")?; Ok(super::Buffer { resource }) } unsafe fn destroy_buffer(&self, buffer: super::Buffer) { @@ -507,7 +507,7 @@ impl crate::Device for super::Device { ) -> Result { let mut ptr = ptr::null_mut(); let hr = (*buffer.resource).Map(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }, &mut ptr); - hr.to_device_result("Map buffer")?; + hr.into_device_result("Map buffer")?; Ok(crate::BufferMapping { ptr: ptr::NonNull::new(ptr.offset(range.start as isize) as *mut _).unwrap(), //TODO: double-check this. Documentation is a bit misleading - @@ -571,9 +571,10 @@ impl crate::Device for super::Device { resource.SetName(cwstr.as_ptr()); } - hr.to_device_result("Texture creation")?; + hr.into_device_result("Texture creation")?; Ok(super::Texture { resource, + format: desc.format, dimension: desc.dimension, size: desc.size, mip_level_count: desc.mip_level_count, @@ -696,7 +697,7 @@ impl crate::Device for super::Device { let allocator = self .raw .create_command_allocator(native::CmdListType::Direct) - .to_device_result("Command allocator creation")?; + .into_device_result("Command allocator creation")?; Ok(super::CommandEncoder { allocator, device: self.raw, @@ -915,7 +916,7 @@ impl crate::Device for super::Device { log::error!("Unable to find serialization function: {:?}", e); crate::DeviceError::Lost })? - .to_device_result("Root signature serialization")?; + .into_device_result("Root signature serialization")?; if !error.is_null() { log::error!( @@ -929,7 +930,7 @@ impl crate::Device for super::Device { let raw = self .raw .create_root_signature(blob, 0) - .to_device_result("Root signature creation")?; + .into_device_result("Root signature creation")?; blob.destroy(); Ok(super::PipelineLayout { @@ -987,7 +988,7 @@ impl crate::Device for super::Device { let raw = self .raw .create_query_heap(heap_ty, desc.count, 0) - .to_device_result("Query heap creation")?; + .into_device_result("Query heap creation")?; Ok(super::QuerySet { raw, ty: desc.ty }) } @@ -1003,7 +1004,7 @@ impl crate::Device for super::Device { &d3d12::ID3D12Fence::uuidof(), raw.mut_void(), ); - hr.to_device_result("Fence creation")?; + hr.into_device_result("Fence creation")?; Ok(super::Fence { raw }) } unsafe fn destroy_fence(&self, fence: super::Fence) { @@ -1025,7 +1026,7 @@ impl crate::Device for super::Device { return Ok(true); } let hr = fence.raw.set_event_on_completion(self.idler.event, value); - hr.to_device_result("Set event")?; + hr.into_device_result("Set event")?; match synchapi::WaitForSingleObject(self.idler.event.0, timeout_ms) { winbase::WAIT_ABANDONED | winbase::WAIT_FAILED => Err(crate::DeviceError::Lost), diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs index 8d5ff2b84e..c493444fd8 100644 --- a/wgpu-hal/src/dx12/instance.rs +++ b/wgpu-hal/src/dx12/instance.rs @@ -41,7 +41,7 @@ unsafe extern "system" fn output_debug_string_handler( Some(msg) => { match MESSAGE_PREFIXES .iter() - .find(|&(prefix, level)| msg.starts_with(prefix)) + .find(|&&(prefix, level)| msg.starts_with(prefix)) { Some(&(prefix, level)) => (&msg[prefix.len() + 2..], level), None => (msg, log::Level::Debug), @@ -78,7 +78,7 @@ impl crate::Instance for super::Instance { if desc.flags.contains(crate::InstanceFlags::VALIDATION) { // Enable debug layer match lib_main.get_debug_interface() { - Ok(pair) => match pair.to_result() { + Ok(pair) => match pair.into_result() { Ok(debug_controller) => { debug_controller.enable_layer(); debug_controller.Release(); @@ -96,7 +96,7 @@ impl crate::Instance for super::Instance { // `CreateDXGIFactory2` if the debug interface is actually available. So // we check for whether it exists first. match lib_dxgi.get_debug_interface1() { - Ok(pair) => match pair.to_result() { + Ok(pair) => match pair.into_result() { Ok(debug_controller) => { debug_controller.destroy(); factory_flags |= native::FactoryCreationFlags::DEBUG; @@ -116,7 +116,7 @@ impl crate::Instance for super::Instance { // Create DXGI factory let factory = match lib_dxgi.create_factory2(factory_flags) { - Ok(pair) => match pair.to_result() { + Ok(pair) => match pair.into_result() { Ok(factory) => factory, Err(err) => { log::warn!("Failed to create DXGI factory: {}", err); @@ -155,7 +155,7 @@ impl crate::Instance for super::Instance { unsafe fn enumerate_adapters(&self) -> Vec> { // Try to use high performance order by default (returns None on Windows < 1803) - let factory6 = match self.factory.cast::().to_result() { + let factory6 = match self.factory.cast::().into_result() { Ok(f6) => { // It's okay to decrement the refcount here because we // have another reference to the factory already owned by `self`. @@ -184,7 +184,7 @@ impl crate::Instance for super::Instance { if hr == winerror::DXGI_ERROR_NOT_FOUND { break; } - if let Err(err) = hr.to_result() { + if let Err(err) = hr.into_result() { log::error!("Failed enumerating adapters: {}", err); break; } @@ -200,12 +200,12 @@ impl crate::Instance for super::Instance { if hr == winerror::DXGI_ERROR_NOT_FOUND { break; } - if let Err(err) = hr.to_result() { + if let Err(err) = hr.into_result() { log::error!("Failed enumerating adapters: {}", err); break; } - match adapter1.cast::().to_result() { + match adapter1.cast::().into_result() { Ok(adapter2) => { adapter1.destroy(); adapter2 diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 542c845907..99764062a1 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -55,11 +55,11 @@ impl crate::Api for Api { } trait HResult { - fn to_result(self) -> Result>; - fn to_device_result(self, description: &str) -> Result; + fn into_result(self) -> Result>; + fn into_device_result(self, description: &str) -> Result; } impl HResult<()> for i32 { - fn to_result(self) -> Result<(), Cow<'static, str>> { + fn into_result(self) -> Result<(), Cow<'static, str>> { if self >= 0 { return Ok(()); } @@ -72,8 +72,8 @@ impl HResult<()> for i32 { }; Err(Cow::Borrowed(description)) } - fn to_device_result(self, description: &str) -> Result<(), crate::DeviceError> { - self.to_result().map_err(|err| { + fn into_device_result(self, description: &str) -> Result<(), crate::DeviceError> { + self.into_result().map_err(|err| { log::error!("{} failed: {}", description, err); if self == winerror::E_OUTOFMEMORY { crate::DeviceError::OutOfMemory @@ -85,11 +85,11 @@ impl HResult<()> for i32 { } impl HResult for (T, i32) { - fn to_result(self) -> Result> { - self.1.to_result().map(|()| self.0) + fn into_result(self) -> Result> { + self.1.into_result().map(|()| self.0) } - fn to_device_result(self, description: &str) -> Result { - self.1.to_device_result(description).map(|()| self.0) + fn into_device_result(self, description: &str) -> Result { + self.1.into_device_result(description).map(|()| self.0) } } @@ -220,6 +220,7 @@ unsafe impl Sync for Buffer {} #[derive(Debug)] pub struct Texture { resource: native::Resource, + format: wgt::TextureFormat, dimension: wgt::TextureDimension, size: wgt::Extent3d, mip_level_count: u32, @@ -242,6 +243,10 @@ impl Texture { fn calc_subresource(&self, mip_level: u32, array_layer: u32, plane: u32) -> u32 { mip_level + (array_layer + plane * self.array_layer_count()) * self.mip_level_count } + + fn calc_subresource_for_copy(&self, base: &crate::TextureCopyBase) -> u32 { + self.calc_subresource(base.mip_level, base.array_layer, 0) + } } #[derive(Debug)] @@ -367,7 +372,7 @@ impl crate::Surface for Surface { non_srgb_format, flags, ); - if let Err(err) = result.to_result() { + if let Err(err) = result.into_result() { log::error!("ResizeBuffers failed: {}", err); return Err(crate::SurfaceError::Other("window is in use")); } @@ -402,12 +407,12 @@ impl crate::Surface for Surface { swap_chain1.mut_void() as *mut *mut _, ); - if let Err(err) = hr.to_result() { + if let Err(err) = hr.into_result() { log::error!("SwapChain creation error: {}", err); return Err(crate::SurfaceError::Other("swap chain creation")); } - match swap_chain1.cast::().to_result() { + match swap_chain1.cast::().into_result() { Ok(swap_chain3) => { swap_chain1.destroy(); swap_chain3 From 9069c2301fa4c642f2aa3617cd8541d0d66027e9 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 8 Jul 2021 18:02:54 -0400 Subject: [PATCH 18/33] hal/dx12: implement fill_buffer with zero --- wgpu-hal/src/dx12/command.rs | 22 ++++++++++----- wgpu-hal/src/dx12/device.rs | 52 +++++++++++++++++++++++++++++++++++- wgpu-hal/src/dx12/mod.rs | 5 ++++ 3 files changed, 71 insertions(+), 8 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index ecd3a89ead..64e5c0e57a 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -170,13 +170,21 @@ impl crate::CommandEncoder for super::CommandEncoder { } } - unsafe fn fill_buffer( - &mut self, - _buffer: &super::Buffer, - _range: crate::MemoryRange, - _value: u8, - ) { - //TODO + unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) { + assert_eq!(value, 0, "Only zero is supported!"); + let list = self.list.unwrap(); + let mut offset = range.start; + while offset < range.end { + let size = super::ZERO_BUFFER_SIZE.min(range.end - offset); + list.CopyBufferRegion( + buffer.resource.as_mut_ptr(), + offset, + self.zero_buffer.as_mut_ptr(), + 0, + size, + ); + offset += size; + } } unsafe fn copy_buffer_to_buffer( diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index ab3b97671d..cebf587262 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,6 +1,6 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; -use std::{mem, ptr, sync::Arc}; +use std::{mem, ptr, slice, sync::Arc}; use winapi::{ shared::{dxgiformat, dxgitype, winerror}, um::{d3d12, d3d12sdklayers, synchapi, winbase}, @@ -30,6 +30,53 @@ impl super::Device { }; hr.into_device_result("Idle fence creation")?; + let mut zero_buffer = native::Resource::null(); + unsafe { + let raw_desc = d3d12::D3D12_RESOURCE_DESC { + Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, + Alignment: 0, + Width: super::ZERO_BUFFER_SIZE, + Height: 1, + DepthOrArraySize: 1, + MipLevels: 1, + Format: dxgiformat::DXGI_FORMAT_UNKNOWN, + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: 1, + Quality: 0, + }, + Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + Flags: d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE, + }; + + let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { + Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, + CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE, + MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_L0, + CreationNodeMask: 0, + VisibleNodeMask: 0, + }; + + let hr = raw.CreateCommittedResource( + &heap_properties, + d3d12::D3D12_HEAP_FLAG_NONE, + &raw_desc, + d3d12::D3D12_RESOURCE_STATE_COMMON, + ptr::null(), + &d3d12::ID3D12Resource::uuidof(), + zero_buffer.mut_void(), + ); + + hr.into_device_result("Zero buffer creation")?; + + let range = d3d12::D3D12_RANGE { Begin: 0, End: 0 }; + let mut ptr = std::ptr::null_mut(); + (*zero_buffer) + .Map(0, &range, &mut ptr) + .into_device_result("Map zero buffer")?; + slice::from_raw_parts_mut(ptr as *mut u8, super::ZERO_BUFFER_SIZE as usize).fill(0); + (*zero_buffer).Unmap(0, &range); + }; + Ok(super::Device { raw, present_queue, @@ -54,6 +101,7 @@ impl super::Device { raw, native::DescriptorHeapType::Sampler, )), + zero_buffer, library: Arc::clone(library), }) } @@ -424,6 +472,7 @@ impl crate::Device for super::Device { self.dsv_pool.into_inner().destroy(); self.srv_uav_pool.into_inner().destroy(); self.sampler_pool.into_inner().destroy(); + self.zero_buffer.destroy(); //self.descriptor_updater.lock().destroy(); @@ -701,6 +750,7 @@ impl crate::Device for super::Device { Ok(super::CommandEncoder { allocator, device: self.raw, + zero_buffer: self.zero_buffer, list: None, free_lists: Vec::new(), temp: super::Temp::default(), diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 99764062a1..1db9aebdb3 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -93,6 +93,8 @@ impl HResult for (T, i32) { } } +const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10; + pub struct Instance { factory: native::Factory4, library: Arc, @@ -166,6 +168,8 @@ pub struct Device { dsv_pool: Mutex, srv_uav_pool: Mutex, sampler_pool: Mutex, + // aux resources + zero_buffer: native::Resource, // library library: Arc, } @@ -194,6 +198,7 @@ impl Temp { pub struct CommandEncoder { allocator: native::CommandAllocator, device: native::Device, + zero_buffer: native::Resource, list: Option, free_lists: Vec, temp: Temp, From b818157f67852a9e5579e9d18708481a07da1244 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 8 Jul 2021 22:47:11 -0400 Subject: [PATCH 19/33] hal/dx12: draws and dispatches --- wgpu-hal/src/dx12/command.rs | 65 +++++++++++++++++++++++++++++++++--- wgpu-hal/src/dx12/device.rs | 36 ++++++++++++++++++-- wgpu-hal/src/dx12/mod.rs | 31 +++++++++++++++-- 3 files changed, 121 insertions(+), 11 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 64e5c0e57a..bdf3bc22ba 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1,5 +1,5 @@ use super::{conv, HResult as _, Resource}; -use std::{mem, ops::Range}; +use std::{mem, ops::Range, ptr}; use winapi::um::d3d12; fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12::D3D12_BOX { @@ -179,7 +179,7 @@ impl crate::CommandEncoder for super::CommandEncoder { list.CopyBufferRegion( buffer.resource.as_mut_ptr(), offset, - self.zero_buffer.as_mut_ptr(), + self.shared.zero_buffer.as_mut_ptr(), 0, size, ); @@ -277,7 +277,9 @@ impl crate::CommandEncoder for super::CommandEncoder { .rows_per_image .map_or(r.size.height, |count| count.get()), Depth: r.size.depth, - RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| count.get()), + RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| { + count.get().max(d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT) + }), }, }; *dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.texture_base); @@ -402,6 +404,9 @@ impl crate::CommandEncoder for super::CommandEncoder { start_instance: u32, instance_count: u32, ) { + self.list + .unwrap() + .draw(vertex_count, instance_count, start_vertex, start_instance); } unsafe fn draw_indexed( &mut self, @@ -411,6 +416,13 @@ impl crate::CommandEncoder for super::CommandEncoder { start_instance: u32, instance_count: u32, ) { + self.list.unwrap().draw_indexed( + index_count, + instance_count, + start_index, + base_vertex, + start_instance, + ); } unsafe fn draw_indirect( &mut self, @@ -418,6 +430,14 @@ impl crate::CommandEncoder for super::CommandEncoder { offset: wgt::BufferAddress, draw_count: u32, ) { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); } unsafe fn draw_indexed_indirect( &mut self, @@ -425,6 +445,14 @@ impl crate::CommandEncoder for super::CommandEncoder { offset: wgt::BufferAddress, draw_count: u32, ) { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + draw_count, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); } unsafe fn draw_indirect_count( &mut self, @@ -434,6 +462,14 @@ impl crate::CommandEncoder for super::CommandEncoder { count_offset: wgt::BufferAddress, max_count: u32, ) { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ); } unsafe fn draw_indexed_indirect_count( &mut self, @@ -443,6 +479,14 @@ impl crate::CommandEncoder for super::CommandEncoder { count_offset: wgt::BufferAddress, max_count: u32, ) { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), + max_count, + buffer.resource.as_mut_ptr(), + offset, + count_buffer.resource.as_mut_ptr(), + count_offset, + ); } // compute @@ -452,6 +496,17 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} - unsafe fn dispatch(&mut self, count: [u32; 3]) {} - unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) {} + unsafe fn dispatch(&mut self, count: [u32; 3]) { + self.list.unwrap().dispatch(count); + } + unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { + self.list.unwrap().ExecuteIndirect( + self.shared.cmd_signatures.dispatch.as_mut_ptr(), + 1, + buffer.resource.as_mut_ptr(), + offset, + ptr::null_mut(), + 0, + ); + } } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index cebf587262..a466ac820c 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -77,6 +77,36 @@ impl super::Device { (*zero_buffer).Unmap(0, &range); }; + let shared = super::DeviceShared { + zero_buffer, + cmd_signatures: super::CommandSignatures { + draw: raw + .create_command_signature( + native::RootSignature::null(), + &[native::IndirectArgument::draw()], + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: raw + .create_command_signature( + native::RootSignature::null(), + &[native::IndirectArgument::draw_indexed()], + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: raw + .create_command_signature( + native::RootSignature::null(), + &[native::IndirectArgument::dispatch()], + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }, + }; + Ok(super::Device { raw, present_queue, @@ -85,6 +115,7 @@ impl super::Device { event: native::Event::create(false, false), }, private_caps, + shared: Arc::new(shared), rtv_pool: Mutex::new(descriptor::CpuPool::new( raw, native::DescriptorHeapType::Rtv, @@ -101,7 +132,6 @@ impl super::Device { raw, native::DescriptorHeapType::Sampler, )), - zero_buffer, library: Arc::clone(library), }) } @@ -472,7 +502,7 @@ impl crate::Device for super::Device { self.dsv_pool.into_inner().destroy(); self.srv_uav_pool.into_inner().destroy(); self.sampler_pool.into_inner().destroy(); - self.zero_buffer.destroy(); + self.shared.destroy(); //self.descriptor_updater.lock().destroy(); @@ -750,7 +780,7 @@ impl crate::Device for super::Device { Ok(super::CommandEncoder { allocator, device: self.raw, - zero_buffer: self.zero_buffer, + shared: Arc::clone(&self.shared), list: None, free_lists: Vec::new(), temp: super::Temp::default(), diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 1db9aebdb3..71eb7299ea 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -158,18 +158,43 @@ struct Idler { event: native::Event, } +struct CommandSignatures { + draw: native::CommandSignature, + draw_indexed: native::CommandSignature, + dispatch: native::CommandSignature, +} + +impl CommandSignatures { + unsafe fn destroy(&self) { + self.draw.destroy(); + self.draw_indexed.destroy(); + self.dispatch.destroy(); + } +} + +struct DeviceShared { + zero_buffer: native::Resource, + cmd_signatures: CommandSignatures, +} + +impl DeviceShared { + unsafe fn destroy(&self) { + self.zero_buffer.destroy(); + self.cmd_signatures.destroy(); + } +} + pub struct Device { raw: native::Device, present_queue: native::CommandQueue, idler: Idler, private_caps: PrivateCapabilities, + shared: Arc, // CPU only pools rtv_pool: Mutex, dsv_pool: Mutex, srv_uav_pool: Mutex, sampler_pool: Mutex, - // aux resources - zero_buffer: native::Resource, // library library: Arc, } @@ -198,7 +223,7 @@ impl Temp { pub struct CommandEncoder { allocator: native::CommandAllocator, device: native::Device, - zero_buffer: native::Resource, + shared: Arc, list: Option, free_lists: Vec, temp: Temp, From 27b808504879ae41abc8fccaa169892661b235aa Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 8 Jul 2021 23:11:20 -0400 Subject: [PATCH 20/33] hal/dx12: queries --- wgpu-hal/src/dx12/adapter.rs | 4 +++- wgpu-hal/src/dx12/command.rs | 32 ++++++++++++++++++++++++++++---- wgpu-hal/src/dx12/device.rs | 19 ++++++++++++++----- wgpu-hal/src/dx12/mod.rs | 2 +- wgpu-types/src/lib.rs | 4 ++-- 5 files changed, 48 insertions(+), 13 deletions(-) diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 17dc1a3910..8b17767747 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -145,7 +145,9 @@ impl super::Adapter { | wgt::Features::MULTI_DRAW_INDIRECT_COUNT | wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER | wgt::Features::NON_FILL_POLYGON_MODE - |wgt::Features::VERTEX_WRITABLE_STORAGE; + | wgt::Features::VERTEX_WRITABLE_STORAGE + | wgt::Features::TIMESTAMP_QUERY + | wgt::Features::PIPELINE_STATISTICS_QUERY; features.set( wgt::Features::CONSERVATIVE_RASTERIZATION, diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index bdf3bc22ba..628ed8a9c6 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -338,10 +338,26 @@ impl crate::CommandEncoder for super::CommandEncoder { } } - unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) {} - unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) {} - unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) {} - unsafe fn reset_queries(&mut self, set: &super::QuerySet, range: Range) {} + unsafe fn begin_query(&mut self, set: &super::QuerySet, index: u32) { + self.list + .unwrap() + .BeginQuery(set.raw.as_mut_ptr(), set.raw_ty, index); + } + unsafe fn end_query(&mut self, set: &super::QuerySet, index: u32) { + self.list + .unwrap() + .EndQuery(set.raw.as_mut_ptr(), set.raw_ty, index); + } + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + self.list.unwrap().EndQuery( + set.raw.as_mut_ptr(), + d3d12::D3D12_QUERY_TYPE_TIMESTAMP, + index, + ); + } + unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range) { + // nothing to do here + } unsafe fn copy_query_results( &mut self, set: &super::QuerySet, @@ -350,6 +366,14 @@ impl crate::CommandEncoder for super::CommandEncoder { offset: wgt::BufferAddress, stride: wgt::BufferSize, ) { + self.list.unwrap().ResolveQueryData( + set.raw.as_mut_ptr(), + set.raw_ty, + range.start, + range.end - range.start, + buffer.resource.as_mut_ptr(), + offset, + ); } // render diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index a466ac820c..59e0322052 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1059,10 +1059,19 @@ impl crate::Device for super::Device { &self, desc: &wgt::QuerySetDescriptor, ) -> Result { - let heap_ty = match desc.ty { - wgt::QueryType::Occlusion => native::QueryHeapType::Occlusion, - wgt::QueryType::PipelineStatistics(_) => native::QueryHeapType::PipelineStatistics, - wgt::QueryType::Timestamp => native::QueryHeapType::Timestamp, + let (heap_ty, raw_ty) = match desc.ty { + wgt::QueryType::Occlusion => ( + native::QueryHeapType::Occlusion, + d3d12::D3D12_QUERY_TYPE_BINARY_OCCLUSION, + ), + wgt::QueryType::PipelineStatistics(_) => ( + native::QueryHeapType::PipelineStatistics, + d3d12::D3D12_QUERY_TYPE_TIMESTAMP, + ), + wgt::QueryType::Timestamp => ( + native::QueryHeapType::Timestamp, + d3d12::D3D12_QUERY_TYPE_PIPELINE_STATISTICS, + ), }; let raw = self @@ -1070,7 +1079,7 @@ impl crate::Device for super::Device { .create_query_heap(heap_ty, desc.count, 0) .into_device_result("Query heap creation")?; - Ok(super::QuerySet { raw, ty: desc.ty }) + Ok(super::QuerySet { raw, raw_ty }) } unsafe fn destroy_query_set(&self, set: super::QuerySet) { set.raw.destroy(); diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 71eb7299ea..b8d320beea 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -302,7 +302,7 @@ unsafe impl Sync for Sampler {} #[derive(Debug)] pub struct QuerySet { raw: native::QueryHeap, - ty: wgt::QueryType, + raw_ty: d3d12::D3D12_QUERY_TYPE, } unsafe impl Send for QuerySet {} diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 3b17b82be6..bda8bc7e9b 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -204,7 +204,7 @@ bitflags::bitflags! { /// /// Supported Platforms: /// - Vulkan (works) - /// - DX12 (future) + /// - DX12 (works) /// /// This is a web and native feature. const TIMESTAMP_QUERY = 0x0000_0000_0000_0004; @@ -219,7 +219,7 @@ bitflags::bitflags! { /// /// Supported Platforms: /// - Vulkan (works) - /// - DX12 (future) + /// - DX12 (works) /// /// This is a web and native feature. const PIPELINE_STATISTICS_QUERY = 0x0000_0000_0000_0008; From 1f91c5f297bdc179bf85fdd0f58a15b07ece501b Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 8 Jul 2021 23:21:02 -0400 Subject: [PATCH 21/33] hal/dx12: debug markers --- wgpu-hal/src/dx12/command.rs | 27 ++++++++++++++++++++++++--- wgpu-hal/src/dx12/device.rs | 10 ++++++++++ wgpu-hal/src/dx12/mod.rs | 2 ++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 628ed8a9c6..dc7001b92e 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -13,6 +13,15 @@ fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12::D3D12_BO } } +impl super::Temp { + fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) { + self.marker.clear(); + self.marker.extend(marker.encode_utf16()); + self.marker.push(0); + (&self.marker, self.marker.len() as u32 * 2) + } +} + impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { let list = match self.free_lists.pop() { @@ -398,9 +407,21 @@ impl crate::CommandEncoder for super::CommandEncoder { ) { } - unsafe fn insert_debug_marker(&mut self, label: &str) {} - unsafe fn begin_debug_marker(&mut self, group_label: &str) {} - unsafe fn end_debug_marker(&mut self) {} + unsafe fn insert_debug_marker(&mut self, label: &str) { + let (wide_label, size) = self.temp.prepare_marker(label); + self.list + .unwrap() + .SetMarker(0, wide_label.as_ptr() as *const _, size); + } + unsafe fn begin_debug_marker(&mut self, group_label: &str) { + let (wide_label, size) = self.temp.prepare_marker(group_label); + self.list + .unwrap() + .BeginEvent(0, wide_label.as_ptr() as *const _, size); + } + unsafe fn end_debug_marker(&mut self) { + self.list.unwrap().EndEvent() + } unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 59e0322052..efcc088a13 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1013,6 +1013,11 @@ impl crate::Device for super::Device { .into_device_result("Root signature creation")?; blob.destroy(); + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + raw.SetName(cwstr.as_ptr()); + } + Ok(super::PipelineLayout { raw, parameter_offsets, @@ -1079,6 +1084,11 @@ impl crate::Device for super::Device { .create_query_heap(heap_ty, desc.count, 0) .into_device_result("Query heap creation")?; + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + raw.SetName(cwstr.as_ptr()); + } + Ok(super::QuerySet { raw, raw_ty }) } unsafe fn destroy_query_set(&self, set: super::QuerySet) { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index b8d320beea..244d070d5d 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -211,11 +211,13 @@ unsafe impl Sync for Queue {} #[derive(Default)] struct Temp { + marker: Vec, barriers: Vec, } impl Temp { fn clear(&mut self) { + self.marker.clear(); self.barriers.clear(); } } From 81f31f5555e2540dc92f99ebfd506fd937c2bd6a Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Fri, 9 Jul 2021 01:22:46 -0400 Subject: [PATCH 22/33] hal: change blend color argument --- wgpu-core/src/command/render.rs | 8 ++++- wgpu-hal/src/dx12/command.rs | 59 ++++++++++++++++++++++++++++----- wgpu-hal/src/dx12/device.rs | 1 + wgpu-hal/src/dx12/mod.rs | 1 + wgpu-hal/src/empty.rs | 2 +- wgpu-hal/src/gles/command.rs | 10 ++---- wgpu-hal/src/lib.rs | 2 +- wgpu-hal/src/metal/command.rs | 9 ++--- wgpu-hal/src/vulkan/command.rs | 12 ++----- 9 files changed, 68 insertions(+), 36 deletions(-) diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 883ae6ceee..a95ea4cdd5 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -1206,8 +1206,14 @@ impl Global { } RenderCommand::SetBlendConstant(ref color) => { state.blend_constant = OptionalState::Set; + let array = [ + color.r as f32, + color.g as f32, + color.b as f32, + color.a as f32, + ]; unsafe { - raw.set_blend_constants(color); + raw.set_blend_constants(&array); } } RenderCommand::SetStencilReference(value) => { diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index dc7001b92e..7beffa8a01 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -46,6 +46,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.list = Some(list); self.temp.clear(); + self.has_pass_label = false; Ok(()) } unsafe fn discard_encoding(&mut self) { @@ -387,8 +388,18 @@ impl crate::CommandEncoder for super::CommandEncoder { // render - unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) {} - unsafe fn end_render_pass(&mut self) {} + unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { + if let Some(label) = desc.label { + self.begin_debug_marker(label); + self.has_pass_label = true; + } + } + unsafe fn end_render_pass(&mut self) { + if self.has_pass_label { + self.end_debug_marker(); + self.has_pass_label = false; + } + } unsafe fn set_bind_group( &mut self, @@ -437,10 +448,32 @@ impl crate::CommandEncoder for super::CommandEncoder { binding: crate::BufferBinding<'a, super::Api>, ) { } - unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) {} - unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) {} - unsafe fn set_stencil_reference(&mut self, value: u32) {} - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) {} + unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) { + let raw_vp = d3d12::D3D12_VIEWPORT { + TopLeftX: rect.x, + TopLeftY: rect.y, + Width: rect.w, + Height: rect.h, + MinDepth: depth_range.start, + MaxDepth: depth_range.end, + }; + self.list.unwrap().RSSetViewports(1, &raw_vp); + } + unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) { + let raw_rect = d3d12::D3D12_RECT { + left: rect.x as i32, + top: rect.y as i32, + right: (rect.x + rect.w) as i32, + bottom: (rect.y + rect.h) as i32, + }; + self.list.unwrap().RSSetScissorRects(1, &raw_rect); + } + unsafe fn set_stencil_reference(&mut self, value: u32) { + self.list.unwrap().set_stencil_reference(value); + } + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.list.unwrap().set_blend_factor(*color); + } unsafe fn draw( &mut self, @@ -536,8 +569,18 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute - unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) {} - unsafe fn end_compute_pass(&mut self) {} + unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + if let Some(label) = desc.label { + self.begin_debug_marker(label); + self.has_pass_label = true; + } + } + unsafe fn end_compute_pass(&mut self) { + if self.has_pass_label { + self.end_debug_marker(); + self.has_pass_label = false; + } + } unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index efcc088a13..2b2cad28e3 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -783,6 +783,7 @@ impl crate::Device for super::Device { shared: Arc::clone(&self.shared), list: None, free_lists: Vec::new(), + has_pass_label: false, temp: super::Temp::default(), }) } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 244d070d5d..30e26561d7 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -228,6 +228,7 @@ pub struct CommandEncoder { shared: Arc, list: Option, free_lists: Vec, + has_pass_label: bool, temp: Temp, } diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 3c6a51eaec..c2710e4532 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -322,7 +322,7 @@ impl crate::CommandEncoder for Encoder { unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) {} unsafe fn set_scissor_rect(&mut self, rect: &crate::Rect) {} unsafe fn set_stencil_reference(&mut self, value: u32) {} - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) {} + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) {} unsafe fn draw( &mut self, diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 5fb632f330..b5e8ff8355 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -767,14 +767,8 @@ impl crate::CommandEncoder for super::CommandEncoder { self.state.stencil.back.reference = value; self.rebind_stencil_func(); } - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) { - let color = [ - color.r as f32, - color.g as f32, - color.b as f32, - color.a as f32, - ]; - self.cmd_buffer.commands.push(C::SetBlendConstant(color)); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.cmd_buffer.commands.push(C::SetBlendConstant(*color)); } unsafe fn draw( diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 97ae478821..4c8f434944 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -437,7 +437,7 @@ pub trait CommandEncoder: Send + Sync { unsafe fn set_viewport(&mut self, rect: &Rect, depth_range: Range); unsafe fn set_scissor_rect(&mut self, rect: &Rect); unsafe fn set_stencil_reference(&mut self, value: u32); - unsafe fn set_blend_constants(&mut self, color: &wgt::Color); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]); unsafe fn draw( &mut self, diff --git a/wgpu-hal/src/metal/command.rs b/wgpu-hal/src/metal/command.rs index e395f9320c..5fb648e151 100644 --- a/wgpu-hal/src/metal/command.rs +++ b/wgpu-hal/src/metal/command.rs @@ -673,14 +673,9 @@ impl crate::CommandEncoder for super::CommandEncoder { let encoder = self.state.render.as_ref().unwrap(); encoder.set_stencil_front_back_reference_value(value, value); } - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) { + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { let encoder = self.state.render.as_ref().unwrap(); - encoder.set_blend_color( - color.r as f32, - color.g as f32, - color.b as f32, - color.a as f32, - ); + encoder.set_blend_color(color[0], color[1], color[2], color[3]); } unsafe fn draw( diff --git a/wgpu-hal/src/vulkan/command.rs b/wgpu-hal/src/vulkan/command.rs index fded155bd2..d87a7361e1 100644 --- a/wgpu-hal/src/vulkan/command.rs +++ b/wgpu-hal/src/vulkan/command.rs @@ -568,16 +568,8 @@ impl crate::CommandEncoder for super::CommandEncoder { .raw .cmd_set_stencil_reference(self.active, vk::StencilFaceFlags::all(), value); } - unsafe fn set_blend_constants(&mut self, color: &wgt::Color) { - let vk_constants = [ - color.r as f32, - color.g as f32, - color.b as f32, - color.a as f32, - ]; - self.device - .raw - .cmd_set_blend_constants(self.active, &vk_constants); + unsafe fn set_blend_constants(&mut self, color: &[f32; 4]) { + self.device.raw.cmd_set_blend_constants(self.active, color); } unsafe fn draw( From 12bc3eb3fc2986152e3b8949f875ef4914dcaa18 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Fri, 9 Jul 2021 01:48:52 -0400 Subject: [PATCH 23/33] hal/dx12: set render targets --- wgpu-hal/src/dx12/command.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 7beffa8a01..672a29802b 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -393,6 +393,30 @@ impl crate::CommandEncoder for super::CommandEncoder { self.begin_debug_marker(label); self.has_pass_label = true; } + + self.temp.barriers.clear(); + + let mut color_views = [native::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_TARGETS]; + for (cv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { + *cv = cat.target.view.handle_rtv.unwrap().raw; + } + let ds_view = match desc.depth_stencil_attachment { + None => ptr::null(), + Some(ref ds) => { + if ds.target.usage == crate::TextureUses::DEPTH_STENCIL_WRITE { + &ds.target.view.handle_dsv_rw.as_ref().unwrap().raw + } else { + &ds.target.view.handle_dsv_ro.as_ref().unwrap().raw + } + } + }; + + self.list.unwrap().OMSetRenderTargets( + desc.color_attachments.len() as u32, + color_views.as_ptr(), + 0, + ds_view, + ); } unsafe fn end_render_pass(&mut self) { if self.has_pass_label { From 00859b9e0cb907d4ed2a57612cfe161e01b2baed Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Fri, 9 Jul 2021 13:17:28 -0400 Subject: [PATCH 24/33] Stop using render pass boundary usages for resource transitions --- wgpu-core/src/command/render.rs | 134 ++++++++++++++---------------- wgpu-core/src/track/buffer.rs | 45 ---------- wgpu-core/src/track/mod.rs | 33 +------- wgpu-core/src/track/texture.rs | 34 -------- wgpu-hal/examples/halmark/main.rs | 19 ++++- wgpu-hal/src/dx12/mod.rs | 2 +- wgpu-hal/src/lib.rs | 9 +- wgpu-hal/src/vulkan/conv.rs | 6 +- wgpu-hal/src/vulkan/device.rs | 18 ++-- wgpu-hal/src/vulkan/mod.rs | 10 +-- 10 files changed, 98 insertions(+), 212 deletions(-) diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index a95ea4cdd5..7cafb9956f 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -15,6 +15,7 @@ use crate::{ memory_init_tracker::{MemoryInitKind, MemoryInitTrackerAction}, pipeline::PipelineFlags, resource::{Texture, TextureView, TextureViewSource}, + swap_chain::SwapChain, track::{StatefulTrackerSubset, TextureSelector, UsageConflict}, validation::{ check_buffer_usage, check_texture_usage, MissingBufferUsageError, MissingTextureUsageError, @@ -491,11 +492,11 @@ where struct RenderAttachment<'a> { texture_id: &'a Stored, selector: &'a TextureSelector, - previous_use: Option, - new_use: hal::TextureUses, + usage: hal::TextureUses, } -type AttachmentDataVec = ArrayVec; +const MAX_TOTAL_ATTACHMENTS: usize = hal::MAX_COLOR_TARGETS + hal::MAX_COLOR_TARGETS + 1; +type AttachmentDataVec = ArrayVec; struct RenderPassInfo<'a, A: hal::Api> { context: RenderPassContext, @@ -514,6 +515,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, cmd_buf: &mut CommandBuffer, view_guard: &'a Storage, id::TextureViewId>, + swap_chain_guard: &'a Storage, id::SwapChainId>, ) -> Result { profiling::scope!("start", "RenderPassInfo"); @@ -527,7 +529,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { let mut attachment_type_name = ""; let mut extent = None; let mut sample_count = 0; - let mut used_swap_chain = None::>; + let mut used_swap_chain = None::<(Stored, hal::TextureUses)>; let mut add_view = |view: &TextureView, type_name| { if let Some(ex) = extent { @@ -577,12 +579,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { } }; - // Using render pass for transition. - let previous_use = cmd_buf - .trackers - .textures - .query(source_id.value, view.selector.clone()); - let new_use = if at.is_read_only(ds_aspects)? { + let usage = if at.is_read_only(ds_aspects)? { is_ds_read_only = true; hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::SAMPLED } else { @@ -591,16 +588,13 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { render_attachments.push(RenderAttachment { texture_id: source_id, selector: &view.selector, - previous_use, - new_use, + usage, }); - let old_use = previous_use.unwrap_or(new_use); depth_stencil = Some(hal::DepthStencilAttachment { target: hal::Attachment { view: &view.raw, - usage: new_use, - boundary_usage: old_use..new_use, + usage, }, depth_ops: at.depth.hal_ops(), stencil_ops: at.stencil.hal_ops(), @@ -626,33 +620,22 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { )); } - let boundary_usage = match color_view.source { + match color_view.source { TextureViewSource::Native(ref source_id) => { - let previous_use = cmd_buf - .trackers - .textures - .query(source_id.value, color_view.selector.clone()); - let new_use = hal::TextureUses::COLOR_TARGET; render_attachments.push(RenderAttachment { texture_id: source_id, selector: &color_view.selector, - previous_use, - new_use, + usage: hal::TextureUses::COLOR_TARGET, }); - - let old_use = previous_use.unwrap_or(new_use); - old_use..new_use } TextureViewSource::SwapChain(ref source_id) => { - assert!(used_swap_chain.is_none()); - used_swap_chain = Some(source_id.clone()); - - let end = hal::TextureUses::empty(); - let start = match at.channel.load_op { + //HACK: guess the start usage based on the load op + let start_usage = match at.channel.load_op { + LoadOp::Load => hal::TextureUses::empty(), LoadOp::Clear => hal::TextureUses::UNINITIALIZED, - LoadOp::Load => end, }; - start..end + assert!(used_swap_chain.is_none()); + used_swap_chain = Some((source_id.clone(), start_usage)); } }; @@ -676,34 +659,25 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { return Err(RenderPassErrorInner::InvalidResolveTargetSampleCount); } - let boundary_usage = match resolve_view.source { + match resolve_view.source { TextureViewSource::Native(ref source_id) => { - let previous_use = cmd_buf - .trackers - .textures - .query(source_id.value, resolve_view.selector.clone()); - let new_use = hal::TextureUses::COLOR_TARGET; render_attachments.push(RenderAttachment { texture_id: source_id, selector: &resolve_view.selector, - previous_use, - new_use, + usage: hal::TextureUses::COLOR_TARGET, }); - - let old_use = previous_use.unwrap_or(new_use); - old_use..new_use } TextureViewSource::SwapChain(ref source_id) => { + //HACK: guess the start usage + let start_usage = hal::TextureUses::UNINITIALIZED; assert!(used_swap_chain.is_none()); - used_swap_chain = Some(source_id.clone()); - hal::TextureUses::UNINITIALIZED..hal::TextureUses::empty() + used_swap_chain = Some((source_id.clone(), start_usage)); } }; hal_resolve_target = Some(hal::Attachment { view: &resolve_view.raw, usage: hal::TextureUses::COLOR_TARGET, - boundary_usage, }); } @@ -711,7 +685,6 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { target: hal::Attachment { view: &color_view.raw, usage: hal::TextureUses::COLOR_TARGET, - boundary_usage, }, resolve_target: hal_resolve_target, ops: at.channel.hal_ops(), @@ -723,6 +696,21 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { return Err(RenderPassErrorInner::InvalidSampleCount(sample_count)); } + if let Some((ref sc_id, start_usage)) = used_swap_chain { + let &(_, ref suf_texture) = swap_chain_guard[sc_id.value] + .acquired_texture + .as_ref() + .unwrap(); + let barrier = hal::TextureBarrier { + texture: std::borrow::Borrow::borrow(suf_texture), + usage: start_usage..hal::TextureUses::COLOR_TARGET, + range: wgt::ImageSubresourceRange::default(), + }; + unsafe { + cmd_buf.encoder.raw.transition_textures(iter::once(barrier)); + } + } + let view_data = AttachmentData { colors: color_attachments .iter() @@ -756,7 +744,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { context, trackers: StatefulTrackerSubset::new(A::VARIANT), render_attachments, - used_swap_chain, + used_swap_chain: used_swap_chain.map(|(sc_id, _)| sc_id), is_ds_read_only, extent, _phantom: PhantomData, @@ -767,9 +755,28 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { mut self, raw: &mut A::CommandEncoder, texture_guard: &Storage, id::TextureId>, + swap_chain_guard: &Storage, id::SwapChainId>, ) -> Result<(StatefulTrackerSubset, Option>), RenderPassErrorInner> { profiling::scope!("finish", "RenderPassInfo"); + unsafe { + raw.end_render_pass(); + } + + if let Some(ref sc_id) = self.used_swap_chain { + let &(_, ref suf_texture) = swap_chain_guard[sc_id.value] + .acquired_texture + .as_ref() + .unwrap(); + let barrier = hal::TextureBarrier { + texture: std::borrow::Borrow::borrow(suf_texture), + usage: hal::TextureUses::COLOR_TARGET..hal::TextureUses::empty(), + range: wgt::ImageSubresourceRange::default(), + }; + unsafe { + raw.transition_textures(iter::once(barrier)); + } + } for ra in self.render_attachments { let texture = &texture_guard[ra.texture_id.value]; @@ -782,29 +789,11 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { ra.texture_id.value, &ra.texture_id.ref_count, ra.selector.clone(), - ra.new_use, + ra.usage, ) .map_err(UsageConflict::from)?; - - if let Some(usage) = ra.previous_use { - // Make the attachment tracks to be aware of the internal - // transition done by the render pass, by registering the - // previous usage as the initial state. - self.trackers - .textures - .prepend( - ra.texture_id.value, - &ra.texture_id.ref_count, - ra.selector.clone(), - usage, - ) - .unwrap(); - } } - unsafe { - raw.end_render_pass(); - } Ok((self.trackers, self.used_swap_chain)) } } @@ -842,7 +831,7 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); let (pass_raw, trackers, query_reset_state) = { - // read-only lock guard + let (swap_chain_guard, mut token) = hub.swap_chains.read(&mut token); let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); let cmd_buf = @@ -886,6 +875,7 @@ impl Global { depth_stencil_attachment, cmd_buf, &*view_guard, + &*swap_chain_guard, ) .map_pass_err(scope)?; @@ -1744,8 +1734,10 @@ impl Global { } log::trace!("Merging {:?} with the render pass", encoder_id); - let (trackers, used_swapchain) = - info.finish(raw, &*texture_guard).map_pass_err(scope)?; + let (trackers, used_swapchain) = info + .finish(raw, &*texture_guard, &*swap_chain_guard) + .map_pass_err(scope)?; + let raw_cmd_buf = unsafe { raw.end_encoding() .map_err(|_| RenderPassErrorInner::OutOfMemory) diff --git a/wgpu-core/src/track/buffer.rs b/wgpu-core/src/track/buffer.rs index 8fb2da3699..35cceb253a 100644 --- a/wgpu-core/src/track/buffer.rs +++ b/wgpu-core/src/track/buffer.rs @@ -75,25 +75,6 @@ impl ResourceState for BufferState { Ok(()) } - fn prepend( - &mut self, - id: Valid, - _selector: Self::Selector, - usage: Self::Usage, - ) -> Result<(), PendingTransition> { - match self.first { - Some(old) if old != usage => Err(PendingTransition { - id, - selector: (), - usage: old..usage, - }), - _ => { - self.first = Some(usage); - Ok(()) - } - } - } - fn merge( &mut self, id: Valid, @@ -205,30 +186,4 @@ mod test { } ); } - - #[test] - fn prepend() { - let mut bs = Unit { - first: None, - last: BufferUses::VERTEX, - }; - let id = Id::dummy(); - bs.prepend(id, (), BufferUses::INDEX).unwrap(); - bs.prepend(id, (), BufferUses::INDEX).unwrap(); - assert_eq!( - bs.prepend(id, (), BufferUses::STORAGE_LOAD), - Err(PendingTransition { - id, - selector: (), - usage: BufferUses::INDEX..BufferUses::STORAGE_LOAD, - }) - ); - assert_eq!( - bs, - Unit { - first: Some(BufferUses::INDEX), - last: BufferUses::VERTEX, - } - ); - } } diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs index 5d6af42d8a..ce5e70a85c 100644 --- a/wgpu-core/src/track/mod.rs +++ b/wgpu-core/src/track/mod.rs @@ -76,14 +76,6 @@ pub(crate) trait ResourceState: Clone + Default { output: Option<&mut Vec>>, ) -> Result<(), PendingTransition>; - /// Sets up the first usage of the selected sub-resources. - fn prepend( - &mut self, - id: Valid, - selector: Self::Selector, - usage: Self::Usage, - ) -> Result<(), PendingTransition>; - /// Merge the state of this resource tracked by a different instance /// with the current one. /// @@ -309,6 +301,7 @@ impl ResourceTracker { /// /// Returns `Some(Usage)` only if this usage is consistent /// across the given selector. + #[allow(unused)] // TODO: figure out if this needs to be removed pub fn query(&self, id: Valid, selector: S::Selector) -> Option { let (index, epoch, backend) = id.0.unzip(); debug_assert_eq!(backend, self.backend); @@ -397,21 +390,6 @@ impl ResourceTracker { self.temp.drain(..) } - /// Turn the tracking from the "expand" mode into the "replace" one, - /// installing the selected usage as the "first". - /// This is a special operation only used by the render pass attachments. - pub(crate) fn prepend( - &mut self, - id: Valid, - ref_count: &RefCount, - selector: S::Selector, - usage: S::Usage, - ) -> Result<(), PendingTransition> { - Self::get_or_insert(self.backend, &mut self.map, id, ref_count) - .state - .prepend(id, selector, usage) - } - /// Merge another tracker into `self` by extending the current states /// without any transitions. pub(crate) fn merge_extend(&mut self, other: &Self) -> Result<(), PendingTransition> { @@ -528,15 +506,6 @@ impl ResourceState for PhantomData { Ok(()) } - fn prepend( - &mut self, - _id: Valid, - _selector: Self::Selector, - _usage: Self::Usage, - ) -> Result<(), PendingTransition> { - Ok(()) - } - fn merge( &mut self, _id: Valid, diff --git a/wgpu-core/src/track/texture.rs b/wgpu-core/src/track/texture.rs index 84e3ae7499..d3fe33528f 100644 --- a/wgpu-core/src/track/texture.rs +++ b/wgpu-core/src/track/texture.rs @@ -136,40 +136,6 @@ impl ResourceState for TextureState { Ok(()) } - fn prepend( - &mut self, - id: Valid, - selector: Self::Selector, - usage: Self::Usage, - ) -> Result<(), PendingTransition> { - assert!(self.mips.len() >= selector.levels.end as usize); - for (mip_id, mip) in self.mips[selector.levels.start as usize..selector.levels.end as usize] - .iter_mut() - .enumerate() - { - let level = selector.levels.start + mip_id as u32; - let layers = mip.isolate(&selector.layers, Unit::new(usage)); - for &mut (ref range, ref mut unit) in layers { - match unit.first { - Some(old) if old != usage => { - return Err(PendingTransition { - id, - selector: TextureSelector { - levels: level..level + 1, - layers: range.clone(), - }, - usage: old..usage, - }); - } - _ => { - unit.first = Some(usage); - } - } - } - } - Ok(()) - } - fn merge( &mut self, id: Valid, diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index fb6c102028..9e762bb3a8 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -593,11 +593,18 @@ impl Example { let ctx = &mut self.contexts[self.context_index]; + let surface_tex = unsafe { self.surface.acquire_texture(!0).unwrap().unwrap().texture }; + + let target_barrier0 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::COLOR_TARGET, + }; unsafe { ctx.encoder.begin_encoding(Some("frame")).unwrap(); + ctx.encoder.transition_textures(iter::once(target_barrier0)); } - let surface_tex = unsafe { self.surface.acquire_texture(!0).unwrap().unwrap().texture }; let surface_view_desc = hal::TextureViewDescriptor { label: None, format: self.surface_format, @@ -622,7 +629,6 @@ impl Example { target: hal::Attachment { view: &surface_tex_view, usage: hal::TextureUses::COLOR_TARGET, - boundary_usage: hal::TextureUses::UNINITIALIZED..hal::TextureUses::empty(), }, resolve_target: None, ops: hal::AttachmentOps::STORE, @@ -655,8 +661,17 @@ impl Example { ctx.frames_recorded += 1; let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT; + let target_barrier1 = hal::TextureBarrier { + texture: surface_tex.borrow(), + range: wgt::ImageSubresourceRange::default(), + usage: hal::TextureUses::COLOR_TARGET..hal::TextureUses::empty(), + }; unsafe { ctx.encoder.end_render_pass(); + ctx.encoder.transition_textures(iter::once(target_barrier1)); + } + + unsafe { let cmd_buf = ctx.encoder.end_encoding().unwrap(); let fence_param = if do_fence { Some((&mut ctx.fence, ctx.fence_value)) diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 30e26561d7..6be21520e9 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -349,7 +349,7 @@ pub struct PipelineLayout { total_slots: u32, // Storing for each associated bind group, which tables we created // in the root signature. This is required for binding descriptor sets. - elements: arrayvec::ArrayVec<[RootElement; crate::MAX_BIND_GROUPS]>, + elements: arrayvec::ArrayVec, } unsafe impl Send for PipelineLayout {} diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 4c8f434944..4e42368746 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1044,13 +1044,9 @@ pub struct BufferTextureCopy { #[derive(Debug)] pub struct Attachment<'a, A: Api> { pub view: &'a A::TextureView, - /// Contains either a single mutating usage as a target, or a valid combination - /// of read-only usages. + /// Contains either a single mutating usage as a target, + /// or a valid combination of read-only usages. pub usage: TextureUses, - /// Defines the boundary usages for the attachment. - /// It is expected to begin a render pass with `boundary_usage.start` usage, - /// and will end it with `boundary_usage.end` usage. - pub boundary_usage: Range, } // Rust gets confused about the impl requirements for `A` @@ -1059,7 +1055,6 @@ impl Clone for Attachment<'_, A> { Self { view: self.view, usage: self.usage, - boundary_usage: self.boundary_usage.clone(), } } } diff --git a/wgpu-hal/src/vulkan/conv.rs b/wgpu-hal/src/vulkan/conv.rs index ffa38a4c18..aa057fc678 100644 --- a/wgpu-hal/src/vulkan/conv.rs +++ b/wgpu-hal/src/vulkan/conv.rs @@ -119,9 +119,7 @@ impl crate::Attachment<'_, super::Api> { let aspects = self.view.aspects(); super::AttachmentKey { format: caps.map_texture_format(self.view.attachment.view_format), - layout_pre: derive_image_layout(self.boundary_usage.start, aspects), - layout_in: derive_image_layout(self.usage, aspects), - layout_post: derive_image_layout(self.boundary_usage.end, aspects), + layout: derive_image_layout(self.usage, aspects), ops, } } @@ -250,7 +248,7 @@ pub fn map_texture_usage_to_barrier( access |= vk::AccessFlags::SHADER_WRITE; } - if usage == crate::TextureUses::UNINITIALIZED { + if usage == crate::TextureUses::UNINITIALIZED || usage.is_empty() { ( vk::PipelineStageFlags::TOP_OF_PIPE, vk::AccessFlags::empty(), diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 64156cf218..152c717293 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -74,7 +74,7 @@ impl super::DeviceShared { for cat in e.key().colors.iter() { color_refs.push(vk::AttachmentReference { attachment: vk_attachments.len() as u32, - layout: cat.base.layout_in, + layout: cat.base.layout, }); vk_attachments.push({ let (load_op, store_op) = conv::map_attachment_ops(cat.base.ops); @@ -83,14 +83,14 @@ impl super::DeviceShared { .samples(samples) .load_op(load_op) .store_op(store_op) - .initial_layout(cat.base.layout_pre) - .final_layout(cat.base.layout_post) + .initial_layout(cat.base.layout) + .final_layout(cat.base.layout) .build() }); let at_ref = if let Some(ref rat) = cat.resolve { let at_ref = vk::AttachmentReference { attachment: vk_attachments.len() as u32, - layout: rat.layout_in, + layout: rat.layout, }; let (load_op, store_op) = conv::map_attachment_ops(rat.ops); let vk_attachment = vk::AttachmentDescription::builder() @@ -98,8 +98,8 @@ impl super::DeviceShared { .samples(vk::SampleCountFlags::TYPE_1) .load_op(load_op) .store_op(store_op) - .initial_layout(rat.layout_pre) - .final_layout(rat.layout_post) + .initial_layout(rat.layout) + .final_layout(rat.layout) .build(); vk_attachments.push(vk_attachment); at_ref @@ -115,7 +115,7 @@ impl super::DeviceShared { if let Some(ref ds) = e.key().depth_stencil { ds_ref = Some(vk::AttachmentReference { attachment: vk_attachments.len() as u32, - layout: ds.base.layout_in, + layout: ds.base.layout, }); let (load_op, store_op) = conv::map_attachment_ops(ds.base.ops); let (stencil_load_op, stencil_store_op) = @@ -127,8 +127,8 @@ impl super::DeviceShared { .store_op(store_op) .stencil_load_op(stencil_load_op) .stencil_store_op(stencil_store_op) - .initial_layout(ds.base.layout_pre) - .final_layout(ds.base.layout_post) + .initial_layout(ds.base.layout) + .final_layout(ds.base.layout) .build(); vk_attachments.push(vk_attachment); } diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index 387d079fd9..e29c37f83b 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -162,20 +162,16 @@ struct PrivateCapabilities { #[derive(Clone, Debug, Eq, Hash, PartialEq)] struct AttachmentKey { format: vk::Format, - layout_pre: vk::ImageLayout, - layout_in: vk::ImageLayout, - layout_post: vk::ImageLayout, + layout: vk::ImageLayout, ops: crate::AttachmentOps, } impl AttachmentKey { /// Returns an attachment key for a compatible attachment. - fn compatible(format: vk::Format, layout_in: vk::ImageLayout) -> Self { + fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self { Self { format, - layout_pre: vk::ImageLayout::GENERAL, - layout_in, - layout_post: vk::ImageLayout::GENERAL, + layout, ops: crate::AttachmentOps::all(), } } From ba1bff58ed47ea670b61cbd1357ee133f1ec8ce7 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Fri, 9 Jul 2021 14:12:42 -0400 Subject: [PATCH 25/33] hal/dx12: attachment clears and resolvess --- wgpu-hal/src/dx12/command.rs | 109 +++++++++++++++++++++++++++++++---- wgpu-hal/src/dx12/device.rs | 8 ++- wgpu-hal/src/dx12/mod.rs | 28 ++++++++- 3 files changed, 131 insertions(+), 14 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 672a29802b..b5b09427d9 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -46,7 +46,7 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.list = Some(list); self.temp.clear(); - self.has_pass_label = false; + self.pass.clear(); Ok(()) } unsafe fn discard_encoding(&mut self) { @@ -391,14 +391,14 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { if let Some(label) = desc.label { self.begin_debug_marker(label); - self.has_pass_label = true; + self.pass.has_label = true; } self.temp.barriers.clear(); let mut color_views = [native::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_TARGETS]; - for (cv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { - *cv = cat.target.view.handle_rtv.unwrap().raw; + for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { + *rtv = cat.target.view.handle_rtv.unwrap().raw; } let ds_view = match desc.depth_stencil_attachment { None => ptr::null(), @@ -411,18 +411,107 @@ impl crate::CommandEncoder for super::CommandEncoder { } }; - self.list.unwrap().OMSetRenderTargets( + let list = self.list.unwrap(); + list.OMSetRenderTargets( desc.color_attachments.len() as u32, color_views.as_ptr(), 0, ds_view, ); + + self.pass.resolves.clear(); + for (rtv, cat) in color_views.iter().zip(desc.color_attachments.iter()) { + if !cat.ops.contains(crate::AttachmentOps::LOAD) { + let value = [ + cat.clear_value.r as f32, + cat.clear_value.g as f32, + cat.clear_value.b as f32, + cat.clear_value.a as f32, + ]; + list.clear_render_target_view(*rtv, value, &[]); + } + if let Some(ref target) = cat.resolve_target { + self.pass.resolves.push(super::PassResolve { + src: cat.target.view.target_base, + dst: target.view.target_base, + format: target.view.raw_format, + }); + } + } + if let Some(ref ds) = desc.depth_stencil_attachment { + let mut flags = native::ClearFlags::empty(); + if !ds.depth_ops.contains(crate::AttachmentOps::LOAD) { + flags |= native::ClearFlags::DEPTH; + } + if !ds.stencil_ops.contains(crate::AttachmentOps::LOAD) { + flags |= native::ClearFlags::STENCIL; + } + + if !ds_view.is_null() { + list.clear_depth_stencil_view( + *ds_view, + flags, + ds.clear_value.0, + ds.clear_value.1 as u8, + &[], + ); + } + } } unsafe fn end_render_pass(&mut self) { - if self.has_pass_label { + if !self.pass.resolves.is_empty() { + let list = self.list.unwrap(); + self.temp.barriers.clear(); + + // All the targets are expected to be in `COLOR_TARGET` state, + // but D3D12 has special source/destination states for the resolves. + for resolve in self.pass.resolves.iter() { + let mut barrier = d3d12::D3D12_RESOURCE_BARRIER { + Type: d3d12::D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + Flags: d3d12::D3D12_RESOURCE_BARRIER_FLAG_NONE, + u: mem::zeroed(), + }; + //Note: this assumes `D3D12_RESOURCE_STATE_RENDER_TARGET`. + // If it's not the case, we can include the `TextureUses` in `PassResove`. + *barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.src.0.as_mut_ptr(), + Subresource: resolve.src.1, + StateBefore: d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST, + }; + self.temp.barriers.push(barrier); + *barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { + pResource: resolve.dst.0.as_mut_ptr(), + Subresource: resolve.dst.1, + StateBefore: d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST, + }; + self.temp.barriers.push(barrier); + } + list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + + for resolve in self.pass.resolves.iter() { + list.ResolveSubresource( + resolve.dst.0.as_mut_ptr(), + resolve.dst.1, + resolve.src.0.as_mut_ptr(), + resolve.src.1, + resolve.format, + ); + } + + // Flip all the barriers to reverse, back into `COLOR_TARGET`. + for barrier in self.temp.barriers.iter_mut() { + let transition = barrier.u.Transition_mut(); + mem::swap(&mut transition.StateBefore, &mut transition.StateAfter); + } + list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); + } + + if self.pass.has_label { self.end_debug_marker(); - self.has_pass_label = false; } + self.pass.clear(); } unsafe fn set_bind_group( @@ -596,14 +685,14 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { if let Some(label) = desc.label { self.begin_debug_marker(label); - self.has_pass_label = true; + self.pass.has_label = true; } } unsafe fn end_compute_pass(&mut self) { - if self.has_pass_label { + if self.pass.has_label { self.end_debug_marker(); - self.has_pass_label = false; } + self.pass.clear(); } unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 2b2cad28e3..ea4bc42e57 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -670,6 +670,12 @@ impl crate::Device for super::Device { desc: &crate::TextureViewDescriptor, ) -> Result { Ok(super::TextureView { + //Note: this mapping also happens in all of the `view_texture_as_*` + raw_format: conv::map_texture_format(desc.format), + target_base: ( + texture.resource, + texture.calc_subresource(desc.range.base_mip_level, desc.range.base_array_layer, 0), + ), handle_srv: if desc .usage .intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) @@ -783,7 +789,7 @@ impl crate::Device for super::Device { shared: Arc::clone(&self.shared), list: None, free_lists: Vec::new(), - has_pass_label: false, + pass: super::PassState::default(), temp: super::Temp::default(), }) } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 6be21520e9..f48d045ad7 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -14,10 +14,11 @@ mod descriptor; mod device; mod instance; +use arrayvec::ArrayVec; use parking_lot::Mutex; use std::{borrow::Cow, ptr, sync::Arc}; use winapi::{ - shared::{dxgi, dxgi1_2, dxgi1_4, dxgitype, windef, winerror}, + shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, Interface as _, }; @@ -222,13 +223,32 @@ impl Temp { } } +struct PassResolve { + src: (native::Resource, u32), + dst: (native::Resource, u32), + format: dxgiformat::DXGI_FORMAT, +} + +#[derive(Default)] +struct PassState { + has_label: bool, + resolves: ArrayVec<[PassResolve; crate::MAX_COLOR_TARGETS]>, +} + +impl PassState { + fn clear(&mut self) { + self.has_label = false; + self.resolves.clear(); + } +} + pub struct CommandEncoder { allocator: native::CommandAllocator, device: native::Device, shared: Arc, list: Option, free_lists: Vec, - has_pass_label: bool, + pass: PassState, temp: Temp, } @@ -284,6 +304,8 @@ impl Texture { #[derive(Debug)] pub struct TextureView { + raw_format: dxgiformat::DXGI_FORMAT, + target_base: (native::Resource, u32), handle_srv: Option, handle_uav: Option, handle_rtv: Option, @@ -349,7 +371,7 @@ pub struct PipelineLayout { total_slots: u32, // Storing for each associated bind group, which tables we created // in the root signature. This is required for binding descriptor sets. - elements: arrayvec::ArrayVec, + elements: ArrayVec, } unsafe impl Send for PipelineLayout {} From c0432761d9dfc077a80749ca7bbab5768d7d25a7 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Fri, 9 Jul 2021 15:34:37 -0400 Subject: [PATCH 26/33] hal/dx12: use range-alloc for descriptor heaps --- Cargo.lock | 7 +++++ wgpu-hal/Cargo.toml | 3 +- wgpu-hal/src/dx12/command.rs | 52 +++++++++++++++++++-------------- wgpu-hal/src/dx12/descriptor.rs | 37 +++++++++++++++++++++-- wgpu-hal/src/dx12/device.rs | 14 +++++++++ wgpu-hal/src/dx12/mod.rs | 4 +++ 6 files changed, 91 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c62c2e5bf4..61df75bd57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1469,6 +1469,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "range-alloc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e935c45e09cc6dcf00d2f0b2d630a58f4095320223d47fc68918722f0538b6" + [[package]] name = "raw-window-handle" version = "0.3.3" @@ -2011,6 +2017,7 @@ dependencies = [ "naga", "objc", "parking_lot", + "range-alloc", "raw-window-handle", "renderdoc-sys", "thiserror", diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index b19101ea06..1084ecbea0 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -16,7 +16,7 @@ default = [] metal = ["naga/msl-out", "block", "foreign-types"] vulkan = ["naga/spv-out", "ash", "gpu-alloc", "gpu-descriptor", "libloading", "inplace_it", "renderdoc-sys"] gles = ["naga/glsl-out", "glow", "egl", "libloading"] -dx12 = ["naga/hlsl-out", "native", "bit-set", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] +dx12 = ["naga/hlsl-out", "native", "bit-set", "range-alloc", "winapi/d3d12", "winapi/d3d12shader", "winapi/d3d12sdklayers", "winapi/dxgi1_6"] [dependencies] bitflags = "1.0" @@ -43,6 +43,7 @@ glow = { git = "https://github.com/grovesNL/glow", rev = "0864897a28bbdd43f89f4f # backend: Dx12 bit-set = { version = "0.5", optional = true } native = { package = "d3d12", version = "0.4", features = ["libloading"], optional = true } +range-alloc = { version = "0.1", optional = true } [target.'cfg(not(target_arch = "wasm32"))'.dependencies] egl = { package = "khronos-egl", version = "4.1", features = ["dynamic"], optional = true } diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index b5b09427d9..7c181b20c8 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -22,6 +22,27 @@ impl super::Temp { } } +impl super::CommandEncoder { + unsafe fn begin_pass(&mut self, label: crate::Label) { + let list = self.list.unwrap(); + if let Some(label) = label { + let (wide_label, size) = self.temp.prepare_marker(label); + list.BeginEvent(0, wide_label.as_ptr() as *const _, size); + self.pass.has_label = true; + } + list.set_descriptor_heaps(&[self.shared.heap_views.raw, self.shared.heap_samplers.raw]); + } + + unsafe fn end_pass(&mut self) { + let list = self.list.unwrap(); + list.set_descriptor_heaps(&[native::DescriptorHeap::null(); 2]); + if self.pass.has_label { + list.EndEvent(); + } + self.pass.clear(); + } +} + impl crate::CommandEncoder for super::CommandEncoder { unsafe fn begin_encoding(&mut self, label: crate::Label) -> Result<(), crate::DeviceError> { let list = match self.free_lists.pop() { @@ -44,6 +65,7 @@ impl crate::CommandEncoder for super::CommandEncoder { let cwstr = conv::map_label(label); list.SetName(cwstr.as_ptr()); } + self.list = Some(list); self.temp.clear(); self.pass.clear(); @@ -389,12 +411,7 @@ impl crate::CommandEncoder for super::CommandEncoder { // render unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { - if let Some(label) = desc.label { - self.begin_debug_marker(label); - self.pass.has_label = true; - } - - self.temp.barriers.clear(); + self.begin_pass(desc.label); let mut color_views = [native::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_TARGETS]; for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { @@ -508,10 +525,7 @@ impl crate::CommandEncoder for super::CommandEncoder { list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr()); } - if self.pass.has_label { - self.end_debug_marker(); - } - self.pass.clear(); + self.end_pass(); } unsafe fn set_bind_group( @@ -524,10 +538,10 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn set_push_constants( &mut self, - layout: &super::PipelineLayout, - stages: wgt::ShaderStages, - offset: u32, - data: &[u32], + _layout: &super::PipelineLayout, + _stages: wgt::ShaderStages, + _offset: u32, + _data: &[u32], ) { } @@ -683,16 +697,10 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { - if let Some(label) = desc.label { - self.begin_debug_marker(label); - self.pass.has_label = true; - } + self.begin_pass(desc.label); } unsafe fn end_compute_pass(&mut self) { - if self.pass.has_label { - self.end_debug_marker(); - } - self.pass.clear(); + self.end_pass(); } unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs index da65b8df54..a1db54aa54 100644 --- a/wgpu-hal/src/dx12/descriptor.rs +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -1,4 +1,7 @@ +use super::HResult as _; use bit_set::BitSet; +use parking_lot::Mutex; +use range_alloc::RangeAllocator; use std::fmt; const HEAP_SIZE_FIXED: usize = 64; @@ -13,14 +16,42 @@ pub(super) struct DualHandle { type DescriptorIndex = u64; -struct LinearHeap { - raw: native::DescriptorHeap, +pub(super) struct GeneralHeap { + pub raw: native::DescriptorHeap, handle_size: u64, total_handles: u64, start: DualHandle, + ranges: Mutex>, } -impl LinearHeap { +impl GeneralHeap { + pub(super) fn new( + device: native::Device, + raw_type: native::DescriptorHeapType, + total_handles: u64, + ) -> Result { + let raw = device + .create_descriptor_heap( + total_handles as u32, + raw_type, + native::DescriptorHeapFlags::SHADER_VISIBLE, + 0, + ) + .into_device_result("Descriptor heap creation")?; + + Ok(Self { + raw, + handle_size: device.get_descriptor_increment_size(raw_type) as u64, + total_handles, + start: DualHandle { + cpu: raw.start_cpu_descriptor(), + gpu: raw.start_gpu_descriptor(), + size: 0, + }, + ranges: Mutex::new(RangeAllocator::new(0..total_handles)), + }) + } + pub(super) fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle { assert!(index < self.total_handles); DualHandle { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index ea4bc42e57..456394abde 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -77,6 +77,10 @@ impl super::Device { (*zero_buffer).Unmap(0, &range); }; + // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 + let capacity_views = 1_000_000; + let capacity_samplers = 2_048; + let shared = super::DeviceShared { zero_buffer, cmd_signatures: super::CommandSignatures { @@ -105,6 +109,16 @@ impl super::Device { ) .into_device_result("Command (dispatch) signature creation")?, }, + heap_views: descriptor::GeneralHeap::new( + raw, + native::DescriptorHeapType::CbvSrvUav, + capacity_samplers, + )?, + heap_samplers: descriptor::GeneralHeap::new( + raw, + native::DescriptorHeapType::Sampler, + capacity_samplers, + )?, }; Ok(super::Device { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index f48d045ad7..dd3276c00b 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -176,12 +176,16 @@ impl CommandSignatures { struct DeviceShared { zero_buffer: native::Resource, cmd_signatures: CommandSignatures, + heap_views: descriptor::GeneralHeap, + heap_samplers: descriptor::GeneralHeap, } impl DeviceShared { unsafe fn destroy(&self) { self.zero_buffer.destroy(); self.cmd_signatures.destroy(); + self.heap_views.raw.destroy(); + self.heap_samplers.raw.destroy(); } } From 7d29a3b70e553647e0b2b5d843dd7cc19089f11a Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 10 Jul 2021 01:42:31 -0400 Subject: [PATCH 27/33] hal/dx12: binding resources and index/vertex buffers --- wgpu-hal/src/dx12/command.rs | 100 +++++++++++++++++++++++++++++++++-- wgpu-hal/src/dx12/conv.rs | 7 +++ wgpu-hal/src/dx12/device.rs | 66 ++++++++++++----------- wgpu-hal/src/dx12/mod.rs | 76 +++++++++++++++++++++----- 4 files changed, 201 insertions(+), 48 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 7c181b20c8..a8f31ca0e6 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -23,8 +23,9 @@ impl super::Temp { } impl super::CommandEncoder { - unsafe fn begin_pass(&mut self, label: crate::Label) { + unsafe fn begin_pass(&mut self, kind: super::PassKind, label: crate::Label) { let list = self.list.unwrap(); + self.pass.kind = kind; if let Some(label) = label { let (wide_label, size) = self.temp.prepare_marker(label); list.BeginEvent(0, wide_label.as_ptr() as *const _, size); @@ -41,6 +42,19 @@ impl super::CommandEncoder { } self.pass.clear(); } + + unsafe fn prepare_draw(&mut self) { + let list = self.list.unwrap(); + while self.pass.dirty_vertex_buffers != 0 { + let index = self.pass.dirty_vertex_buffers.trailing_zeros(); + self.pass.dirty_vertex_buffers ^= 1 << index; + list.IASetVertexBuffers( + index, + 1, + self.pass.vertex_buffers.as_ptr().offset(index as isize), + ); + } + } } impl crate::CommandEncoder for super::CommandEncoder { @@ -411,7 +425,7 @@ impl crate::CommandEncoder for super::CommandEncoder { // render unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { - self.begin_pass(desc.label); + self.begin_pass(super::PassKind::Render, desc.label); let mut color_views = [native::CpuDescriptor { ptr: 0 }; crate::MAX_COLOR_TARGETS]; for (rtv, cat) in color_views.iter_mut().zip(desc.color_attachments.iter()) { @@ -535,6 +549,70 @@ impl crate::CommandEncoder for super::CommandEncoder { group: &super::BindGroup, dynamic_offsets: &[wgt::DynamicOffset], ) { + use super::PassKind as Pk; + + let list = self.list.unwrap(); + let info = &layout.bind_group_infos[index as usize]; + let mut root_index = info.base_root_index; + + // Bind CBV/SRC/UAV descriptor tables + if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + match self.pass.kind { + Pk::Render => list.set_graphics_root_descriptor_table(root_index, group.gpu_views), + Pk::Compute => list.set_compute_root_descriptor_table(root_index, group.gpu_views), + Pk::Transfer => (), + } + root_index += 1; + } + + // Bind Sampler descriptor tables. + if info.tables.contains(super::TableTypes::SAMPLERS) { + match self.pass.kind { + Pk::Render => { + list.set_graphics_root_descriptor_table(root_index, group.gpu_samplers) + } + Pk::Compute => { + list.set_compute_root_descriptor_table(root_index, group.gpu_samplers) + } + Pk::Transfer => (), + } + root_index += 1; + } + + // Bind root descriptors + for ((kind, &gpu_base), &offset) in info + .dynamic_buffers + .iter() + .zip(group.dynamic_buffers.iter()) + .zip(dynamic_offsets) + { + let gpu_address = gpu_base + offset as wgt::BufferAddress; + match self.pass.kind { + Pk::Render => match *kind { + super::BufferViewKind::Constant => { + list.set_graphics_root_constant_buffer_view(root_index, gpu_address) + } + super::BufferViewKind::ShaderResource => { + list.set_graphics_root_shader_resource_view(root_index, gpu_address) + } + super::BufferViewKind::UnorderedAccess => { + list.set_graphics_root_unordered_access_view(root_index, gpu_address) + } + }, + Pk::Compute => match *kind { + super::BufferViewKind::Constant => { + list.set_compute_root_constant_buffer_view(root_index, gpu_address) + } + super::BufferViewKind::ShaderResource => { + list.set_compute_root_shader_resource_view(root_index, gpu_address) + } + super::BufferViewKind::UnorderedAccess => { + list.set_compute_root_unordered_access_view(root_index, gpu_address) + } + }, + Pk::Transfer => (), + } + } } unsafe fn set_push_constants( &mut self, @@ -568,13 +646,23 @@ impl crate::CommandEncoder for super::CommandEncoder { binding: crate::BufferBinding<'a, super::Api>, format: wgt::IndexFormat, ) { + self.list.unwrap().set_index_buffer( + binding.resolve_address(), + binding.resolve_size() as u32, + conv::map_index_format(format), + ); } unsafe fn set_vertex_buffer<'a>( &mut self, index: u32, binding: crate::BufferBinding<'a, super::Api>, ) { + let vb = &mut self.pass.vertex_buffers[index as usize]; + vb.BufferLocation = binding.resolve_address(); + vb.SizeInBytes = binding.resolve_size() as u32; + self.pass.dirty_vertex_buffers |= 1 << index; } + unsafe fn set_viewport(&mut self, rect: &crate::Rect, depth_range: Range) { let raw_vp = d3d12::D3D12_VIEWPORT { TopLeftX: rect.x, @@ -609,6 +697,7 @@ impl crate::CommandEncoder for super::CommandEncoder { start_instance: u32, instance_count: u32, ) { + self.prepare_draw(); self.list .unwrap() .draw(vertex_count, instance_count, start_vertex, start_instance); @@ -621,6 +710,7 @@ impl crate::CommandEncoder for super::CommandEncoder { start_instance: u32, instance_count: u32, ) { + self.prepare_draw(); self.list.unwrap().draw_indexed( index_count, instance_count, @@ -635,6 +725,7 @@ impl crate::CommandEncoder for super::CommandEncoder { offset: wgt::BufferAddress, draw_count: u32, ) { + self.prepare_draw(); self.list.unwrap().ExecuteIndirect( self.shared.cmd_signatures.draw.as_mut_ptr(), draw_count, @@ -650,6 +741,7 @@ impl crate::CommandEncoder for super::CommandEncoder { offset: wgt::BufferAddress, draw_count: u32, ) { + self.prepare_draw(); self.list.unwrap().ExecuteIndirect( self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), draw_count, @@ -667,6 +759,7 @@ impl crate::CommandEncoder for super::CommandEncoder { count_offset: wgt::BufferAddress, max_count: u32, ) { + self.prepare_draw(); self.list.unwrap().ExecuteIndirect( self.shared.cmd_signatures.draw.as_mut_ptr(), max_count, @@ -684,6 +777,7 @@ impl crate::CommandEncoder for super::CommandEncoder { count_offset: wgt::BufferAddress, max_count: u32, ) { + self.prepare_draw(); self.list.unwrap().ExecuteIndirect( self.shared.cmd_signatures.draw_indexed.as_mut_ptr(), max_count, @@ -697,7 +791,7 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { - self.begin_pass(desc.label); + self.begin_pass(super::PassKind::Compute, desc.label); } unsafe fn end_compute_pass(&mut self) { self.end_pass(); diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 01aa76b5a2..b7dee5aa7a 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -110,6 +110,13 @@ pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI } } +pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT, + wgt::IndexFormat::Uint32 => dxgiformat::DXGI_FORMAT_R32_UINT, + } +} + pub fn map_acomposite_alpha_mode(mode: crate::CompositeAlphaMode) -> dxgi1_2::DXGI_ALPHA_MODE { use crate::CompositeAlphaMode as Cam; match mode { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 456394abde..666fc687b0 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -588,7 +588,10 @@ impl crate::Device for super::Device { ); hr.into_device_result("Buffer creation")?; - Ok(super::Buffer { resource }) + Ok(super::Buffer { + resource, + size: desc.size, + }) } unsafe fn destroy_buffer(&self, buffer: super::Buffer) { buffer.resource.destroy(); @@ -803,7 +806,7 @@ impl crate::Device for super::Device { shared: Arc::clone(&self.shared), list: None, free_lists: Vec::new(), - pass: super::PassState::default(), + pass: super::PassState::new(), temp: super::Temp::default(), }) } @@ -864,7 +867,6 @@ impl crate::Device for super::Device { let total_parameters = root_constants.len() + desc.bind_group_layouts.len() * 2; // Guarantees that no re-allocation is done, and our pointers are valid let mut parameters = Vec::with_capacity(total_parameters); - let mut parameter_offsets = Vec::with_capacity(total_parameters); let root_space_offset = if !root_constants.is_empty() { 1 } else { 0 }; // Collect the whole number of bindings we will create upfront. @@ -885,12 +887,15 @@ impl crate::Device for super::Device { .sum(); let mut ranges = Vec::with_capacity(total_non_dynamic_entries); - let mut root_elements = - arrayvec::ArrayVec::<[super::RootElement; crate::MAX_BIND_GROUPS]>::default(); + let mut bind_group_infos = + arrayvec::ArrayVec::<[super::BindGroupInfo; crate::MAX_BIND_GROUPS]>::default(); for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { let space = root_space_offset + index as u32; - let mut types = super::TableTypes::empty(); - let root_table_offset = root_offset as usize; + let mut info = super::BindGroupInfo { + tables: super::TableTypes::empty(), + base_root_index: parameters.len() as u32, + dynamic_buffers: Vec::new(), + }; let mut visibility_view_static = wgt::ShaderStages::empty(); let mut visibility_view_dynamic = wgt::ShaderStages::empty(); @@ -928,12 +933,11 @@ impl crate::Device for super::Device { )); } if ranges.len() > range_base { - parameter_offsets.push(root_offset); parameters.push(native::RootParameter::descriptor_table( conv::map_visibility(visibility_view_static), &ranges[range_base..], )); - types |= super::TableTypes::SRV_CBV_UAV; + info.tables |= super::TableTypes::SRV_CBV_UAV; root_offset += 1; } @@ -955,12 +959,11 @@ impl crate::Device for super::Device { )); } if ranges.len() > range_base { - parameter_offsets.push(root_offset); parameters.push(native::RootParameter::descriptor_table( conv::map_visibility(visibility_sampler), &ranges[range_base..], )); - types |= super::TableTypes::SAMPLERS; + info.tables |= super::TableTypes::SAMPLERS; root_offset += 1; } @@ -979,31 +982,30 @@ impl crate::Device for super::Device { register: entry.binding, space, }; - let param = match buffer_ty { - wgt::BufferBindingType::Uniform => { - native::RootParameter::cbv_descriptor(dynamic_buffers_visibility, binding) - } - wgt::BufferBindingType::Storage { read_only: true } => { - native::RootParameter::srv_descriptor(dynamic_buffers_visibility, binding) - } - wgt::BufferBindingType::Storage { read_only: false } => { - native::RootParameter::uav_descriptor(dynamic_buffers_visibility, binding) - } + let (kind, param) = match buffer_ty { + wgt::BufferBindingType::Uniform => ( + super::BufferViewKind::Constant, + native::RootParameter::cbv_descriptor(dynamic_buffers_visibility, binding), + ), + wgt::BufferBindingType::Storage { read_only: true } => ( + super::BufferViewKind::ShaderResource, + native::RootParameter::srv_descriptor(dynamic_buffers_visibility, binding), + ), + wgt::BufferBindingType::Storage { read_only: false } => ( + super::BufferViewKind::UnorderedAccess, + native::RootParameter::uav_descriptor(dynamic_buffers_visibility, binding), + ), }; - parameter_offsets.push(root_offset); + info.dynamic_buffers.push(kind); parameters.push(param); root_offset += 2; // root view costs 2 words } - root_elements.push(super::RootElement { - types, - offset: root_table_offset, - }); + bind_group_infos.push(info); } // Ensure that we didn't reallocate! debug_assert_eq!(ranges.len(), total_non_dynamic_entries); - assert_eq!(parameters.len(), parameter_offsets.len()); let (blob, error) = self .library @@ -1041,9 +1043,7 @@ impl crate::Device for super::Device { Ok(super::PipelineLayout { raw, - parameter_offsets, - total_slots: root_offset, - elements: root_elements, + bind_group_infos, }) } unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { @@ -1054,7 +1054,11 @@ impl crate::Device for super::Device { &self, desc: &crate::BindGroupDescriptor, ) -> Result { - Ok(super::BindGroup {}) + Ok(super::BindGroup { + gpu_views: unimplemented!(), + gpu_samplers: unimplemented!(), + dynamic_buffers: Vec::new(), + }) } unsafe fn destroy_bind_group(&self, group: super::BindGroup) {} diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index dd3276c00b..d53793aed8 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -16,7 +16,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, ptr, sync::Arc}; +use std::{borrow::Cow, fmt, mem, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -233,16 +233,36 @@ struct PassResolve { format: dxgiformat::DXGI_FORMAT, } -#[derive(Default)] +enum PassKind { + Render, + Compute, + Transfer, +} + struct PassState { has_label: bool, resolves: ArrayVec<[PassResolve; crate::MAX_COLOR_TARGETS]>, + vertex_buffers: [d3d12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: usize, + kind: PassKind, } impl PassState { + fn new() -> Self { + PassState { + has_label: false, + resolves: ArrayVec::new(), + vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS], + dirty_vertex_buffers: 0, + kind: PassKind::Transfer, + } + } + fn clear(&mut self) { self.has_label = false; self.resolves.clear(); + self.dirty_vertex_buffers = 0; + self.kind = PassKind::Transfer; } } @@ -269,11 +289,25 @@ unsafe impl Sync for CommandBuffer {} #[derive(Debug)] pub struct Buffer { resource: native::Resource, + size: wgt::BufferAddress, } unsafe impl Send for Buffer {} unsafe impl Sync for Buffer {} +impl crate::BufferBinding<'_, Api> { + fn resolve_size(&self) -> wgt::BufferAddress { + match self.size { + Some(size) => size.get(), + None => self.buffer.size - self.offset, + } + } + + fn resolve_address(&self) -> wgt::BufferAddress { + self.buffer.resource.gpu_virtual_address() + self.offset + } +} + #[derive(Debug)] pub struct Texture { resource: native::Resource, @@ -350,8 +384,27 @@ pub struct BindGroupLayout { entries: Vec, } -#[derive(Debug)] -pub struct BindGroup {} +enum BufferViewKind { + Constant, + ShaderResource, + UnorderedAccess, +} + +pub struct BindGroup { + gpu_views: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE, + gpu_samplers: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE, + dynamic_buffers: Vec, +} + +impl fmt::Debug for BindGroup { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("BindGroup") + .field("gpu_views", &self.gpu_views.ptr) + .field("gpu_samplers", &self.gpu_samplers.ptr) + .field("dynamic_buffers", &self.dynamic_buffers) + .finish() + } +} bitflags::bitflags! { struct TableTypes: u8 { @@ -360,22 +413,17 @@ bitflags::bitflags! { } } -type RootSignatureOffset = usize; - -pub struct RootElement { - types: TableTypes, - offset: RootSignatureOffset, +struct BindGroupInfo { + base_root_index: u32, + tables: TableTypes, + dynamic_buffers: Vec, } pub struct PipelineLayout { raw: native::RootSignature, - /// A root offset per parameter. - parameter_offsets: Vec, - /// Total number of root slots occupied by the layout. - total_slots: u32, // Storing for each associated bind group, which tables we created // in the root signature. This is required for binding descriptor sets. - elements: ArrayVec, + bind_group_infos: ArrayVec, } unsafe impl Send for PipelineLayout {} From 040a602b697dca127893ce3df1de3233046303d6 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 11 Jul 2021 11:27:39 -0400 Subject: [PATCH 28/33] hal/dx12: bind group creation --- wgpu-hal/src/dx12/command.rs | 14 +-- wgpu-hal/src/dx12/descriptor.rs | 125 +++++++++++++++++++-- wgpu-hal/src/dx12/device.rs | 193 +++++++++++++++++++++++++++++++- wgpu-hal/src/dx12/mod.rs | 22 ++-- 4 files changed, 317 insertions(+), 37 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index a8f31ca0e6..bec9a743b6 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -557,9 +557,10 @@ impl crate::CommandEncoder for super::CommandEncoder { // Bind CBV/SRC/UAV descriptor tables if info.tables.contains(super::TableTypes::SRV_CBV_UAV) { + let descriptor = group.handle_views.unwrap().gpu; match self.pass.kind { - Pk::Render => list.set_graphics_root_descriptor_table(root_index, group.gpu_views), - Pk::Compute => list.set_compute_root_descriptor_table(root_index, group.gpu_views), + Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor), Pk::Transfer => (), } root_index += 1; @@ -567,13 +568,10 @@ impl crate::CommandEncoder for super::CommandEncoder { // Bind Sampler descriptor tables. if info.tables.contains(super::TableTypes::SAMPLERS) { + let descriptor = group.handle_samplers.unwrap().gpu; match self.pass.kind { - Pk::Render => { - list.set_graphics_root_descriptor_table(root_index, group.gpu_samplers) - } - Pk::Compute => { - list.set_compute_root_descriptor_table(root_index, group.gpu_samplers) - } + Pk::Render => list.set_graphics_root_descriptor_table(root_index, descriptor), + Pk::Compute => list.set_compute_root_descriptor_table(root_index, descriptor), Pk::Transfer => (), } root_index += 1; diff --git a/wgpu-hal/src/dx12/descriptor.rs b/wgpu-hal/src/dx12/descriptor.rs index a1db54aa54..da8ce6786b 100644 --- a/wgpu-hal/src/dx12/descriptor.rs +++ b/wgpu-hal/src/dx12/descriptor.rs @@ -9,15 +9,26 @@ const HEAP_SIZE_FIXED: usize = 64; #[derive(Copy, Clone)] pub(super) struct DualHandle { cpu: native::CpuDescriptor, - gpu: native::GpuDescriptor, + pub gpu: native::GpuDescriptor, /// How large the block allocated to this handle is. - size: u64, + count: u64, +} + +impl fmt::Debug for DualHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DualHandle") + .field("cpu", &self.cpu.ptr) + .field("gpu", &self.gpu.ptr) + .field("count", &self.count) + .finish() + } } type DescriptorIndex = u64; pub(super) struct GeneralHeap { pub raw: native::DescriptorHeap, + ty: native::DescriptorHeapType, handle_size: u64, total_handles: u64, start: DualHandle, @@ -27,13 +38,13 @@ pub(super) struct GeneralHeap { impl GeneralHeap { pub(super) fn new( device: native::Device, - raw_type: native::DescriptorHeapType, + ty: native::DescriptorHeapType, total_handles: u64, ) -> Result { let raw = device .create_descriptor_heap( total_handles as u32, - raw_type, + ty, native::DescriptorHeapFlags::SHADER_VISIBLE, 0, ) @@ -41,37 +52,53 @@ impl GeneralHeap { Ok(Self { raw, - handle_size: device.get_descriptor_increment_size(raw_type) as u64, + ty, + handle_size: device.get_descriptor_increment_size(ty) as u64, total_handles, start: DualHandle { cpu: raw.start_cpu_descriptor(), gpu: raw.start_gpu_descriptor(), - size: 0, + count: 0, }, ranges: Mutex::new(RangeAllocator::new(0..total_handles)), }) } - pub(super) fn at(&self, index: DescriptorIndex, size: u64) -> DualHandle { + pub(super) fn at(&self, index: DescriptorIndex, count: u64) -> DualHandle { assert!(index < self.total_handles); DualHandle { cpu: self.cpu_descriptor_at(index), gpu: self.gpu_descriptor_at(index), - size, + count, } } - pub(super) fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { + fn cpu_descriptor_at(&self, index: u64) -> native::CpuDescriptor { native::CpuDescriptor { ptr: self.start.cpu.ptr + (self.handle_size * index) as usize, } } - pub(super) fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { + fn gpu_descriptor_at(&self, index: u64) -> native::GpuDescriptor { native::GpuDescriptor { ptr: self.start.gpu.ptr + self.handle_size * index, } } + + pub(super) fn allocate_slice(&self, count: u64) -> Result { + let range = self.ranges.lock().allocate_range(count).map_err(|err| { + log::error!("Unable to allocate descriptors: {:?}", err); + crate::DeviceError::OutOfMemory + })?; + Ok(range.start) + } + + /// Free handles previously given out by this `DescriptorHeapSlice`. + /// Do not use this with handles not given out by this `DescriptorHeapSlice`. + pub(crate) fn free_slice(&self, handle: DualHandle) { + let start = (handle.gpu.ptr - self.start.gpu.ptr) / self.handle_size; + self.ranges.lock().free_range(start..start + handle.count); + } } /// Fixed-size free-list allocator for CPU descriptors. @@ -199,3 +226,81 @@ impl CpuPool { } } } + +pub(super) struct CpuHeapInner { + pub raw: native::DescriptorHeap, + pub stage: Vec, +} + +pub(super) struct CpuHeap { + pub inner: Mutex, + start: native::CpuDescriptor, + handle_size: u32, + total: u32, +} + +unsafe impl Send for CpuHeap {} +unsafe impl Sync for CpuHeap {} + +impl CpuHeap { + pub(super) fn new( + device: native::Device, + ty: native::DescriptorHeapType, + total: u32, + ) -> Result { + let handle_size = device.get_descriptor_increment_size(ty); + let raw = device + .create_descriptor_heap(total, ty, native::DescriptorHeapFlags::empty(), 0) + .into_device_result("CPU descriptor heap creation")?; + + Ok(Self { + inner: Mutex::new(CpuHeapInner { + raw, + stage: Vec::new(), + }), + start: raw.start_cpu_descriptor(), + handle_size, + total, + }) + } + + pub(super) fn at(&self, index: u32) -> native::CpuDescriptor { + native::CpuDescriptor { + ptr: self.start.ptr + (self.handle_size * index) as usize, + } + } + + pub(super) unsafe fn destroy(self) { + self.inner.into_inner().raw.destroy(); + } +} + +impl fmt::Debug for CpuHeap { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CpuHeap") + .field("start", &self.start.ptr) + .field("handle_size", &self.handle_size) + .field("total", &self.total) + .finish() + } +} + +pub(super) unsafe fn upload( + device: native::Device, + src: &CpuHeapInner, + dst: &GeneralHeap, + dummy_copy_counts: &[u32], +) -> Result { + let count = src.stage.len() as u32; + let index = dst.allocate_slice(count as u64)?; + device.CopyDescriptors( + 1, + &dst.cpu_descriptor_at(index), + &count, + count, + src.stage.as_ptr(), + dummy_copy_counts.as_ptr(), + dst.ty as u32, + ); + Ok(dst.at(index, count as u64)) +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 666fc687b0..ef43e0f84e 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -825,13 +825,56 @@ impl crate::Device for super::Device { &self, desc: &crate::BindGroupLayoutDescriptor, ) -> Result { + let (mut num_buffer_views, mut num_samplers, mut num_texture_views) = (0, 0, 0); + for entry in desc.entries.iter() { + match entry.ty { + wgt::BindingType::Buffer { + has_dynamic_offset: true, + .. + } => {} + wgt::BindingType::Buffer { .. } => num_buffer_views += 1, + wgt::BindingType::Texture { .. } | wgt::BindingType::StorageTexture { .. } => { + num_texture_views += 1 + } + wgt::BindingType::Sampler { .. } => num_samplers += 1, + } + } + + let num_views = num_buffer_views + num_texture_views; Ok(super::BindGroupLayout { entries: desc.entries.to_vec(), + cpu_heap_views: if num_views != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + native::DescriptorHeapType::CbvSrvUav, + num_views, + )?; + Some(heap) + } else { + None + }, + cpu_heap_samplers: if num_samplers != 0 { + let heap = descriptor::CpuHeap::new( + self.raw, + native::DescriptorHeapType::Sampler, + num_samplers, + )?; + Some(heap) + } else { + None + }, + copy_counts: vec![1; num_views.max(num_samplers) as usize], }) } - unsafe fn destroy_bind_group_layout(&self, _bg_layout: super::BindGroupLayout) { - // just drop + unsafe fn destroy_bind_group_layout(&self, bg_layout: super::BindGroupLayout) { + if let Some(cpu_heap) = bg_layout.cpu_heap_views { + cpu_heap.destroy(); + } + if let Some(cpu_heap) = bg_layout.cpu_heap_samplers { + cpu_heap.destroy(); + } } + unsafe fn create_pipeline_layout( &self, desc: &crate::PipelineLayoutDescriptor, @@ -1054,13 +1097,151 @@ impl crate::Device for super::Device { &self, desc: &crate::BindGroupDescriptor, ) -> Result { + let mut cpu_views = desc + .layout + .cpu_heap_views + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_views { + inner.stage.clear(); + } + let mut cpu_samplers = desc + .layout + .cpu_heap_samplers + .as_ref() + .map(|cpu_heap| cpu_heap.inner.lock()); + if let Some(ref mut inner) = cpu_samplers { + inner.stage.clear(); + } + let mut dynamic_buffers = Vec::new(); + + for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { + match layout.ty { + wgt::BindingType::Buffer { + has_dynamic_offset, + ty, + .. + } => { + let data = &desc.buffers[entry.resource_index as usize]; + let gpu_address = data.resolve_address(); + let size = data.resolve_size() as u32; + let inner = cpu_views.as_mut().unwrap(); + let cpu_index = inner.stage.len() as u32; + let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); + match ty { + _ if has_dynamic_offset => { + dynamic_buffers.push(gpu_address); + } + wgt::BufferBindingType::Uniform => { + let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let raw_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC { + BufferLocation: gpu_address, + SizeInBytes: size, + }; + self.raw.CreateConstantBufferView(&raw_desc, handle); + } + wgt::BufferBindingType::Storage { read_only: true } => { + let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, + ViewDimension: d3d12::D3D12_SRV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_SRV { + FirstElement: data.offset, + NumElements: size / 4, + StructureByteStride: 0, + Flags: d3d12::D3D12_BUFFER_SRV_FLAG_RAW, + }; + self.raw.CreateShaderResourceView( + data.buffer.resource.as_mut_ptr(), + &raw_desc, + handle, + ); + } + wgt::BufferBindingType::Storage { read_only: false } => { + let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { + Format: dxgiformat::DXGI_FORMAT_R32_TYPELESS, + ViewDimension: d3d12::D3D12_UAV_DIMENSION_BUFFER, + u: mem::zeroed(), + }; + *raw_desc.u.Buffer_mut() = d3d12::D3D12_BUFFER_UAV { + FirstElement: data.offset, + NumElements: size / 4, + StructureByteStride: 0, + CounterOffsetInBytes: 0, + Flags: d3d12::D3D12_BUFFER_UAV_FLAG_RAW, + }; + self.raw.CreateUnorderedAccessView( + data.buffer.resource.as_mut_ptr(), + ptr::null_mut(), + &raw_desc, + handle, + ); + } + } + inner.stage.push(handle); + } + wgt::BindingType::Texture { .. } + | wgt::BindingType::StorageTexture { + access: wgt::StorageTextureAccess::ReadOnly, + .. + } => { + let data = &desc.textures[entry.resource_index as usize]; + let handle = data.view.handle_srv.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + wgt::BindingType::StorageTexture { .. } => { + let data = &desc.textures[entry.resource_index as usize]; + let handle = data.view.handle_uav.unwrap(); + cpu_views.as_mut().unwrap().stage.push(handle.raw); + } + wgt::BindingType::Sampler { .. } => { + let data = &desc.samplers[entry.resource_index as usize]; + cpu_samplers.as_mut().unwrap().stage.push(data.handle.raw); + } + } + } + + let handle_views = match cpu_views { + Some(inner) => { + let dual = descriptor::upload( + self.raw, + &*inner, + &self.shared.heap_views, + &desc.layout.copy_counts, + )?; + Some(dual) + } + None => None, + }; + let handle_samplers = match cpu_samplers { + Some(inner) => { + let dual = descriptor::upload( + self.raw, + &*inner, + &self.shared.heap_samplers, + &desc.layout.copy_counts, + )?; + Some(dual) + } + None => None, + }; + Ok(super::BindGroup { - gpu_views: unimplemented!(), - gpu_samplers: unimplemented!(), - dynamic_buffers: Vec::new(), + handle_views, + handle_samplers, + dynamic_buffers, }) } - unsafe fn destroy_bind_group(&self, group: super::BindGroup) {} + unsafe fn destroy_bind_group(&self, group: super::BindGroup) { + if let Some(dual) = group.handle_views { + let _ = self.shared.heap_views.free_slice(dual); + } + if let Some(dual) = group.handle_samplers { + let _ = self.shared.heap_samplers.free_slice(dual); + } + } unsafe fn create_shader_module( &self, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index d53793aed8..e08a6f1563 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -16,7 +16,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, fmt, mem, ptr, sync::Arc}; +use std::{borrow::Cow, mem, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -147,6 +147,8 @@ pub struct Adapter { device: native::Device, library: Arc, private_caps: PrivateCapabilities, + //Note: this isn't used right now, but we'll need it later. + #[allow(unused)] workarounds: Workarounds, } @@ -382,6 +384,9 @@ unsafe impl Sync for Fence {} pub struct BindGroupLayout { /// Sorted list of entries. entries: Vec, + cpu_heap_views: Option, + cpu_heap_samplers: Option, + copy_counts: Vec, // all 1's } enum BufferViewKind { @@ -390,22 +395,13 @@ enum BufferViewKind { UnorderedAccess, } +#[derive(Debug)] pub struct BindGroup { - gpu_views: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE, - gpu_samplers: d3d12::D3D12_GPU_DESCRIPTOR_HANDLE, + handle_views: Option, + handle_samplers: Option, dynamic_buffers: Vec, } -impl fmt::Debug for BindGroup { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("BindGroup") - .field("gpu_views", &self.gpu_views.ptr) - .field("gpu_samplers", &self.gpu_samplers.ptr) - .field("dynamic_buffers", &self.dynamic_buffers) - .finish() - } -} - bitflags::bitflags! { struct TableTypes: u8 { const SRV_CBV_UAV = 0x1; From 0a609e74c00592dfd868fca33d5c428c2ef618bc Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 11 Jul 2021 23:45:49 -0400 Subject: [PATCH 29/33] hal/dx12: compute pipelines, update naga --- Cargo.lock | 2 +- wgpu-core/Cargo.toml | 2 +- wgpu-hal/Cargo.toml | 4 +- wgpu-hal/src/dx12/adapter.rs | 12 ++- wgpu-hal/src/dx12/command.rs | 6 +- wgpu-hal/src/dx12/device.rs | 137 ++++++++++++++++++++++++++++++---- wgpu-hal/src/dx12/instance.rs | 3 +- wgpu-hal/src/dx12/mod.rs | 36 +++++++-- wgpu-hal/src/metal/device.rs | 2 +- wgpu/Cargo.toml | 6 +- 10 files changed, 175 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61df75bd57..c1a33fb97b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1051,7 +1051,7 @@ dependencies = [ [[package]] name = "naga" version = "0.5.0" -source = "git+https://github.com/gfx-rs/naga?rev=0b9af95793e319817e74a30601cbcd4bad9bb3e6#0b9af95793e319817e74a30601cbcd4bad9bb3e6" +source = "git+https://github.com/gfx-rs/naga?rev=458db0b#458db0b5228854dc417283f4b9742e03f25bc492" dependencies = [ "bit-set", "bitflags", diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml index af10fbb528..93583303d5 100644 --- a/wgpu-core/Cargo.toml +++ b/wgpu-core/Cargo.toml @@ -36,7 +36,7 @@ thiserror = "1" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in"] [dependencies.wgt] diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 1084ecbea0..dd968b33fe 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -59,11 +59,11 @@ core-graphics-types = "0.1" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" [dev-dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in"] [dev-dependencies] diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 8b17767747..175d1524e6 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -18,6 +18,7 @@ impl super::Adapter { pub(super) fn expose( adapter: native::WeakPtr, library: &Arc, + instance_flags: crate::InstanceFlags, ) -> Option> { // Create the device so that we can get the capabilities. let device = match library.create_device(adapter, native::FeatureLevel::L11_0) { @@ -106,6 +107,7 @@ impl super::Adapter { } else { super::MemoryArchitecture::NonUnified }, + shader_debug_info: instance_flags.contains(crate::InstanceFlags::DEBUG), }; // Theoretically vram limited, but in practice 2^20 is the limit @@ -226,7 +228,7 @@ impl super::Adapter { impl crate::Adapter for super::Adapter { unsafe fn open( &self, - _features: wgt::Features, + features: wgt::Features, ) -> Result, crate::DeviceError> { let queue = self .device @@ -238,7 +240,13 @@ impl crate::Adapter for super::Adapter { ) .into_device_result("Queue creation")?; - let device = super::Device::new(self.device, queue, self.private_caps, &self.library)?; + let device = super::Device::new( + self.device, + queue, + features, + self.private_caps, + &self.library, + )?; Ok(crate::OpenDevice { device, queue: super::Queue { raw: queue }, diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index bec9a743b6..e93989981e 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1,4 +1,4 @@ -use super::{conv, HResult as _, Resource}; +use super::{conv, HResult as _}; use std::{mem, ops::Range, ptr}; use winapi::um::d3d12; @@ -637,7 +637,7 @@ impl crate::CommandEncoder for super::CommandEncoder { self.list.unwrap().EndEvent() } - unsafe fn set_render_pipeline(&mut self, pipeline: &Resource) {} + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) {} unsafe fn set_index_buffer<'a>( &mut self, @@ -795,7 +795,7 @@ impl crate::CommandEncoder for super::CommandEncoder { self.end_pass(); } - unsafe fn set_compute_pipeline(&mut self, pipeline: &Resource) {} + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) {} unsafe fn dispatch(&mut self, count: [u32; 3]) { self.list.unwrap().dispatch(count); diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index ef43e0f84e..cd636fdf8a 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,21 +1,19 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; -use std::{mem, ptr, slice, sync::Arc}; +use std::{ffi, mem, ptr, slice, sync::Arc}; use winapi::{ shared::{dxgiformat, dxgitype, winerror}, - um::{d3d12, d3d12sdklayers, synchapi, winbase}, + um::{d3d12, d3d12sdklayers, d3dcompiler, synchapi, winbase}, Interface, }; -//TODO: remove this -use super::Resource; - const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; impl super::Device { pub(super) fn new( raw: native::Device, present_queue: native::CommandQueue, + features: wgt::Features, private_caps: super::PrivateCapabilities, library: &Arc, ) -> Result { @@ -82,6 +80,7 @@ impl super::Device { let capacity_samplers = 2_048; let shared = super::DeviceShared { + features, zero_buffer, cmd_signatures: super::CommandSignatures { draw: raw @@ -508,6 +507,79 @@ impl super::Device { .CreateDepthStencilView(texture.resource.as_mut_ptr(), &raw_desc, handle.raw); handle } + + fn load_shader( + &self, + stage: &crate::ProgrammableStage, + layout: &super::PipelineLayout, + naga_stage: naga::ShaderStage, + ) -> Result { + use naga::back::hlsl; + + let stage_bit = crate::util::map_naga_stage(naga_stage); + let module = &stage.module.naga.module; + //TODO: reuse the writer + let mut source = String::new(); + let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); + let reflection_info = writer + .write(module, &stage.module.naga.info) + .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {:?}", e)))?; + + let full_stage = format!( + "{}_{}\0", + naga_stage.to_hlsl_str(), + layout.naga_options.shader_model.to_str() + ); + let raw_ep = ffi::CString::new(stage.entry_point).unwrap(); + + let mut shader_data = native::Blob::null(); + let mut error = native::Blob::null(); + let mut compile_flags = d3dcompiler::D3DCOMPILE_ENABLE_STRICTNESS; + if self.private_caps.shader_debug_info { + compile_flags |= d3dcompiler::D3DCOMPILE_DEBUG; + } + if self + .shared + .features + .contains(wgt::Features::UNSIZED_BINDING_ARRAY) + { + compile_flags |= d3dcompiler::D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES; + } + + let hr = unsafe { + d3dcompiler::D3DCompile( + source.as_ptr() as *const _, + source.len(), + ptr::null(), + ptr::null(), + ptr::null_mut(), + raw_ep.as_ptr(), + full_stage.as_ptr() as *const i8, + compile_flags, + 0, + shader_data.mut_void() as *mut *mut _, + error.mut_void() as *mut *mut _, + ) + }; + + match hr.into_result() { + Ok(()) => Ok(shader_data), + Err(e) => { + let message = unsafe { + let slice = slice::from_raw_parts( + error.GetBufferPointer() as *const u8, + error.GetBufferSize(), + ); + String::from_utf8_lossy(slice) + }; + let full_msg = format!("D3DCompile error ({}): {}", e, message); + unsafe { + error.destroy(); + } + Err(crate::PipelineError::Linkage(stage_bit, full_msg)) + } + } + } } impl crate::Device for super::Device { @@ -1087,6 +1159,9 @@ impl crate::Device for super::Device { Ok(super::PipelineLayout { raw, bind_group_infos, + naga_options: naga::back::hlsl::Options { + shader_model: naga::back::hlsl::ShaderModel::V5_1, + }, }) } unsafe fn destroy_pipeline_layout(&self, pipeline_layout: super::PipelineLayout) { @@ -1247,24 +1322,58 @@ impl crate::Device for super::Device { &self, desc: &crate::ShaderModuleDescriptor, shader: crate::ShaderInput, - ) -> Result { - Ok(Resource) + ) -> Result { + match shader { + crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga }), + crate::ShaderInput::SpirV(_) => { + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + } + } } - unsafe fn destroy_shader_module(&self, module: Resource) {} + unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) { + // just drop + } + unsafe fn create_render_pipeline( &self, desc: &crate::RenderPipelineDescriptor, - ) -> Result { - Ok(Resource) + ) -> Result { + unimplemented!() + } + unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { + pipeline.raw.destroy(); } - unsafe fn destroy_render_pipeline(&self, pipeline: Resource) {} + unsafe fn create_compute_pipeline( &self, desc: &crate::ComputePipelineDescriptor, - ) -> Result { - Ok(Resource) + ) -> Result { + let cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + + let pair = self.raw.create_compute_pipeline_state( + desc.layout.raw, + native::Shader::from_blob(cs), + 0, + native::CachedPSO::null(), + native::PipelineStateFlags::empty(), + ); + + cs.destroy(); + + let raw = pair.into_result().map_err(|err| { + crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) + })?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + raw.SetName(cwstr.as_ptr()); + } + + Ok(super::ComputePipeline { raw }) + } + unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { + pipeline.raw.destroy(); } - unsafe fn destroy_compute_pipeline(&self, pipeline: Resource) {} unsafe fn create_query_set( &self, diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs index c493444fd8..001033ca31 100644 --- a/wgpu-hal/src/dx12/instance.rs +++ b/wgpu-hal/src/dx12/instance.rs @@ -133,6 +133,7 @@ impl crate::Instance for super::Instance { factory, library: Arc::new(lib_main), lib_dxgi, + flags: desc.flags, }) } @@ -218,7 +219,7 @@ impl crate::Instance for super::Instance { } }; - adapters.extend(super::Adapter::expose(raw, &self.library)); + adapters.extend(super::Adapter::expose(raw, &self.library, self.flags)); } adapters } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index e08a6f1563..23af6212f1 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -16,7 +16,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, mem, ptr, sync::Arc}; +use std::{borrow::Cow, mem, num::NonZeroU32, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -25,9 +25,6 @@ use winapi::{ #[derive(Clone)] pub struct Api; -//TODO: remove these temporaries -#[derive(Debug)] -pub struct Resource; impl crate::Api for Api { type Instance = Instance; @@ -50,9 +47,9 @@ impl crate::Api for Api { type BindGroupLayout = BindGroupLayout; type BindGroup = BindGroup; type PipelineLayout = PipelineLayout; - type ShaderModule = Resource; - type RenderPipeline = Resource; - type ComputePipeline = Resource; + type ShaderModule = ShaderModule; + type RenderPipeline = RenderPipeline; + type ComputePipeline = ComputePipeline; } trait HResult { @@ -100,6 +97,7 @@ pub struct Instance { factory: native::Factory4, library: Arc, lib_dxgi: native::DxgiLib, + flags: crate::InstanceFlags, } unsafe impl Send for Instance {} @@ -133,6 +131,7 @@ enum MemoryArchitecture { struct PrivateCapabilities { heterogeneous_resource_heaps: bool, memory_architecture: MemoryArchitecture, + shader_debug_info: bool, } #[derive(Default)] @@ -176,6 +175,7 @@ impl CommandSignatures { } struct DeviceShared { + features: wgt::Features, zero_buffer: native::Resource, cmd_signatures: CommandSignatures, heap_views: descriptor::GeneralHeap, @@ -420,11 +420,33 @@ pub struct PipelineLayout { // Storing for each associated bind group, which tables we created // in the root signature. This is required for binding descriptor sets. bind_group_infos: ArrayVec, + naga_options: naga::back::hlsl::Options, } unsafe impl Send for PipelineLayout {} unsafe impl Sync for PipelineLayout {} +#[derive(Debug)] +pub struct ShaderModule { + naga: crate::NagaShader, +} + +pub struct RenderPipeline { + raw: native::PipelineState, + topology: d3d12::D3D12_PRIMITIVE_TOPOLOGY, + vertex_strides: [Option; crate::MAX_VERTEX_BUFFERS], +} + +unsafe impl Send for RenderPipeline {} +unsafe impl Sync for RenderPipeline {} + +pub struct ComputePipeline { + raw: native::PipelineState, +} + +unsafe impl Send for ComputePipeline {} +unsafe impl Sync for ComputePipeline {} + impl SwapChain { unsafe fn release_resources(self) -> native::WeakPtr { for resource in self.resources { diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 23caea67e6..358de98e5f 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -676,7 +676,7 @@ impl crate::Device for super::Device { match shader { crate::ShaderInput::Naga(naga) => Ok(super::ShaderModule { naga }), crate::ShaderInput::SpirV(_) => { - unreachable!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") + panic!("SPIRV_SHADER_PASSTHROUGH is not enabled for this backend") } } } diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml index e521508b49..b2252bccd6 100644 --- a/wgpu/Cargo.toml +++ b/wgpu/Cargo.toml @@ -73,19 +73,19 @@ env_logger = "0.8" [dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" optional = true # used to test all the example shaders [dev-dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in"] # used to generate SPIR-V for the Web target [target.'cfg(target_arch = "wasm32")'.dependencies.naga] git = "https://github.com/gfx-rs/naga" -rev = "0b9af95793e319817e74a30601cbcd4bad9bb3e6" +rev = "458db0b" features = ["wgsl-in", "spv-out"] [[example]] From 8163dc7b471a2f4c1db74ca12514a58bacde1a44 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 13 Jul 2021 00:29:29 -0400 Subject: [PATCH 30/33] hal/dx12: pipeline creation --- README.md | 2 +- wgpu-hal/src/dx12/adapter.rs | 1 + wgpu-hal/src/dx12/command.rs | 34 ++++- wgpu-hal/src/dx12/conv.rs | 207 +++++++++++++++++++++++++++++- wgpu-hal/src/dx12/device.rs | 240 ++++++++++++++++++++++++++++++++--- wgpu-hal/src/dx12/mod.rs | 6 +- wgpu-types/src/lib.rs | 2 +- 7 files changed, 465 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 8edc2907c7..0bb725b5c4 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ If you are looking for the native implementation or bindings to the API in other API | Windows 7/10 | Linux & Android | macOS & iOS | ----- | ------------------ | ------------------ | ------------------ | DX11 | :construction: | | | - DX12 | :construction: | | | + DX12 | :ok: | | | Vulkan | :white_check_mark: | :white_check_mark: | | Metal | | | :white_check_mark: | GLes3 | | :ok: | | diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 175d1524e6..15df11842d 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -108,6 +108,7 @@ impl super::Adapter { super::MemoryArchitecture::NonUnified }, shader_debug_info: instance_flags.contains(crate::InstanceFlags::DEBUG), + heap_create_not_zeroed: false, //TODO: winapi support for Options7 }; // Theoretically vram limited, but in practice 2^20 is the limit diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index e93989981e..1bcf7e38bf 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -637,7 +637,30 @@ impl crate::CommandEncoder for super::CommandEncoder { self.list.unwrap().EndEvent() } - unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) {} + unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { + let list = self.list.unwrap(); + + list.set_graphics_root_signature(pipeline.signature); + list.set_pipeline_state(pipeline.raw); + list.IASetPrimitiveTopology(pipeline.topology); + + //TODO: root signature changes require full layout rebind! + + for (index, (vb, &stride)) in self + .pass + .vertex_buffers + .iter_mut() + .zip(pipeline.vertex_strides.iter()) + .enumerate() + { + if let Some(stride) = stride { + if vb.StrideInBytes != stride.get() { + vb.StrideInBytes = stride.get(); + self.pass.dirty_vertex_buffers |= 1 << index; + } + } + } + } unsafe fn set_index_buffer<'a>( &mut self, @@ -795,7 +818,14 @@ impl crate::CommandEncoder for super::CommandEncoder { self.end_pass(); } - unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) {} + unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { + let list = self.list.unwrap(); + + list.set_compute_root_signature(pipeline.signature); + list.set_pipeline_state(pipeline.raw); + + //TODO: root signature changes require full layout rebind! + } unsafe fn dispatch(&mut self, count: [u32; 3]) { self.list.unwrap().dispatch(count); diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index b7dee5aa7a..7ea1c5885b 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -1,7 +1,7 @@ use std::iter; use winapi::{ shared::{dxgi1_2, dxgiformat}, - um::d3d12, + um::{d3d12, d3dcommon}, }; pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { @@ -117,6 +117,45 @@ pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { } } +pub fn map_vertex_format(format: wgt::VertexFormat) -> dxgiformat::DXGI_FORMAT { + use wgt::VertexFormat as Vf; + use winapi::shared::dxgiformat::*; + + match format { + Vf::Unorm8x2 => DXGI_FORMAT_R8G8_UNORM, + Vf::Snorm8x2 => DXGI_FORMAT_R8G8_SNORM, + Vf::Uint8x2 => DXGI_FORMAT_R8G8_UINT, + Vf::Sint8x2 => DXGI_FORMAT_R8G8_SINT, + Vf::Unorm8x4 => DXGI_FORMAT_R8G8B8A8_UNORM, + Vf::Snorm8x4 => DXGI_FORMAT_R8G8B8A8_SNORM, + Vf::Uint8x4 => DXGI_FORMAT_R8G8B8A8_UINT, + Vf::Sint8x4 => DXGI_FORMAT_R8G8B8A8_SINT, + Vf::Unorm16x2 => DXGI_FORMAT_R16G16_UNORM, + Vf::Snorm16x2 => DXGI_FORMAT_R16G16_SNORM, + Vf::Uint16x2 => DXGI_FORMAT_R16G16_UINT, + Vf::Sint16x2 => DXGI_FORMAT_R16G16_SINT, + Vf::Float16x2 => DXGI_FORMAT_R16G16_FLOAT, + Vf::Unorm16x4 => DXGI_FORMAT_R16G16B16A16_UNORM, + Vf::Snorm16x4 => DXGI_FORMAT_R16G16B16A16_SNORM, + Vf::Uint16x4 => DXGI_FORMAT_R16G16B16A16_UINT, + Vf::Sint16x4 => DXGI_FORMAT_R16G16B16A16_SINT, + Vf::Float16x4 => DXGI_FORMAT_R16G16B16A16_FLOAT, + Vf::Uint32 => DXGI_FORMAT_R32_UINT, + Vf::Sint32 => DXGI_FORMAT_R32_SINT, + Vf::Float32 => DXGI_FORMAT_R32_FLOAT, + Vf::Uint32x2 => DXGI_FORMAT_R32G32_UINT, + Vf::Sint32x2 => DXGI_FORMAT_R32G32_SINT, + Vf::Float32x2 => DXGI_FORMAT_R32G32_FLOAT, + Vf::Uint32x3 => DXGI_FORMAT_R32G32B32_UINT, + Vf::Sint32x3 => DXGI_FORMAT_R32G32B32_SINT, + Vf::Float32x3 => DXGI_FORMAT_R32G32B32_FLOAT, + Vf::Uint32x4 => DXGI_FORMAT_R32G32B32A32_UINT, + Vf::Sint32x4 => DXGI_FORMAT_R32G32B32A32_SINT, + Vf::Float32x4 => DXGI_FORMAT_R32G32B32A32_FLOAT, + Vf::Float64 | Vf::Float64x2 | Vf::Float64x3 | Vf::Float64x4 => unimplemented!(), + } +} + pub fn map_acomposite_alpha_mode(mode: crate::CompositeAlphaMode) -> dxgi1_2::DXGI_ALPHA_MODE { use crate::CompositeAlphaMode as Cam; match mode { @@ -308,3 +347,169 @@ pub fn map_texture_usage_to_state(usage: crate::TextureUses) -> d3d12::D3D12_RES } state } + +pub fn map_topology( + topology: wgt::PrimitiveTopology, +) -> ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + d3d12::D3D12_PRIMITIVE_TOPOLOGY, +) { + match topology { + wgt::PrimitiveTopology::PointList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_POINTLIST, + ), + wgt::PrimitiveTopology::LineList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINELIST, + ), + wgt::PrimitiveTopology::LineStrip => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_LINESTRIP, + ), + wgt::PrimitiveTopology::TriangleList => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST, + ), + wgt::PrimitiveTopology::TriangleStrip => ( + d3d12::D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE, + d3dcommon::D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, + ), + } +} + +pub fn map_polygon_mode(mode: wgt::PolygonMode) -> d3d12::D3D12_FILL_MODE { + match mode { + wgt::PolygonMode::Point => { + log::error!("Point rasterization is not supported"); + d3d12::D3D12_FILL_MODE_WIREFRAME + } + wgt::PolygonMode::Line => d3d12::D3D12_FILL_MODE_WIREFRAME, + wgt::PolygonMode::Fill => d3d12::D3D12_FILL_MODE_SOLID, + } +} + +fn map_blend_factor(factor: wgt::BlendFactor, is_alpha: bool) -> d3d12::D3D12_BLEND { + use wgt::BlendFactor as Bf; + match factor { + Bf::Zero => d3d12::D3D12_BLEND_ZERO, + Bf::One => d3d12::D3D12_BLEND_ONE, + Bf::Src if is_alpha => d3d12::D3D12_BLEND_SRC_ALPHA, + Bf::Src => d3d12::D3D12_BLEND_SRC_COLOR, + Bf::OneMinusSrc if is_alpha => d3d12::D3D12_BLEND_INV_SRC_ALPHA, + Bf::OneMinusSrc => d3d12::D3D12_BLEND_INV_SRC_COLOR, + Bf::Dst if is_alpha => d3d12::D3D12_BLEND_DEST_ALPHA, + Bf::Dst => d3d12::D3D12_BLEND_DEST_COLOR, + Bf::OneMinusDst if is_alpha => d3d12::D3D12_BLEND_INV_DEST_ALPHA, + Bf::OneMinusDst => d3d12::D3D12_BLEND_INV_DEST_COLOR, + Bf::SrcAlpha => d3d12::D3D12_BLEND_SRC_ALPHA, + Bf::OneMinusSrcAlpha => d3d12::D3D12_BLEND_INV_SRC_ALPHA, + Bf::DstAlpha => d3d12::D3D12_BLEND_DEST_ALPHA, + Bf::OneMinusDstAlpha => d3d12::D3D12_BLEND_INV_DEST_ALPHA, + Bf::Constant => d3d12::D3D12_BLEND_BLEND_FACTOR, + Bf::OneMinusConstant => d3d12::D3D12_BLEND_INV_BLEND_FACTOR, + Bf::SrcAlphaSaturated => d3d12::D3D12_BLEND_SRC_ALPHA_SAT, + //Bf::Src1Color if is_alpha => d3d12::D3D12_BLEND_SRC1_ALPHA, + //Bf::Src1Color => d3d12::D3D12_BLEND_SRC1_COLOR, + //Bf::OneMinusSrc1Color if is_alpha => d3d12::D3D12_BLEND_INV_SRC1_ALPHA, + //Bf::OneMinusSrc1Color => d3d12::D3D12_BLEND_INV_SRC1_COLOR, + //Bf::Src1Alpha => d3d12::D3D12_BLEND_SRC1_ALPHA, + //Bf::OneMinusSrc1Alpha => d3d12::D3D12_BLEND_INV_SRC1_ALPHA, + } +} + +fn map_blend_component( + component: &wgt::BlendComponent, + is_alpha: bool, +) -> ( + d3d12::D3D12_BLEND_OP, + d3d12::D3D12_BLEND, + d3d12::D3D12_BLEND, +) { + let raw_op = match component.operation { + wgt::BlendOperation::Add => d3d12::D3D12_BLEND_OP_ADD, + wgt::BlendOperation::Subtract => d3d12::D3D12_BLEND_OP_SUBTRACT, + wgt::BlendOperation::ReverseSubtract => d3d12::D3D12_BLEND_OP_REV_SUBTRACT, + wgt::BlendOperation::Min => d3d12::D3D12_BLEND_OP_MIN, + wgt::BlendOperation::Max => d3d12::D3D12_BLEND_OP_MAX, + }; + let raw_src = map_blend_factor(component.src_factor, is_alpha); + let raw_dst = map_blend_factor(component.dst_factor, is_alpha); + (raw_op, raw_src, raw_dst) +} + +pub fn map_render_targets( + color_targets: &[wgt::ColorTargetState], +) -> [d3d12::D3D12_RENDER_TARGET_BLEND_DESC; d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize] +{ + let dummy_target = d3d12::D3D12_RENDER_TARGET_BLEND_DESC { + BlendEnable: 0, + LogicOpEnable: 0, + SrcBlend: d3d12::D3D12_BLEND_ZERO, + DestBlend: d3d12::D3D12_BLEND_ZERO, + BlendOp: d3d12::D3D12_BLEND_OP_ADD, + SrcBlendAlpha: d3d12::D3D12_BLEND_ZERO, + DestBlendAlpha: d3d12::D3D12_BLEND_ZERO, + BlendOpAlpha: d3d12::D3D12_BLEND_OP_ADD, + LogicOp: d3d12::D3D12_LOGIC_OP_CLEAR, + RenderTargetWriteMask: 0, + }; + let mut raw_targets = [dummy_target; d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + + for (raw, ct) in raw_targets.iter_mut().zip(color_targets.iter()) { + raw.RenderTargetWriteMask = ct.write_mask.bits() as u8; + if let Some(ref blend) = ct.blend { + let (color_op, color_src, color_dst) = map_blend_component(&blend.color, false); + let (alpha_op, alpha_src, alpha_dst) = map_blend_component(&blend.alpha, true); + raw.BlendEnable = 1; + raw.BlendOp = color_op; + raw.SrcBlend = color_src; + raw.DestBlend = color_dst; + raw.BlendOpAlpha = alpha_op; + raw.SrcBlendAlpha = alpha_src; + raw.DestBlendAlpha = alpha_dst; + } + } + + raw_targets +} + +fn map_stencil_op(op: wgt::StencilOperation) -> d3d12::D3D12_STENCIL_OP { + use wgt::StencilOperation as So; + match op { + So::Keep => d3d12::D3D12_STENCIL_OP_KEEP, + So::Zero => d3d12::D3D12_STENCIL_OP_ZERO, + So::Replace => d3d12::D3D12_STENCIL_OP_REPLACE, + So::IncrementClamp => d3d12::D3D12_STENCIL_OP_INCR_SAT, + So::IncrementWrap => d3d12::D3D12_STENCIL_OP_INCR, + So::DecrementClamp => d3d12::D3D12_STENCIL_OP_DECR_SAT, + So::DecrementWrap => d3d12::D3D12_STENCIL_OP_DECR, + So::Invert => d3d12::D3D12_STENCIL_OP_INVERT, + } +} + +fn map_stencil_face(face: &wgt::StencilFaceState) -> d3d12::D3D12_DEPTH_STENCILOP_DESC { + d3d12::D3D12_DEPTH_STENCILOP_DESC { + StencilFailOp: map_stencil_op(face.fail_op), + StencilDepthFailOp: map_stencil_op(face.depth_fail_op), + StencilPassOp: map_stencil_op(face.pass_op), + StencilFunc: map_comparison(face.compare), + } +} + +pub fn map_depth_stencil(ds: &wgt::DepthStencilState) -> d3d12::D3D12_DEPTH_STENCIL_DESC { + d3d12::D3D12_DEPTH_STENCIL_DESC { + DepthEnable: if ds.is_depth_enabled() { 1 } else { 0 }, + DepthWriteMask: if ds.depth_write_enabled { + d3d12::D3D12_DEPTH_WRITE_MASK_ALL + } else { + d3d12::D3D12_DEPTH_WRITE_MASK_ZERO + }, + DepthFunc: map_comparison(ds.depth_compare), + StencilEnable: if ds.stencil.is_enabled() { 1 } else { 0 }, + StencilReadMask: ds.stencil.read_mask as u8, + StencilWriteMask: ds.stencil.write_mask as u8, + FrontFace: map_stencil_face(&ds.stencil.front), + BackFace: map_stencil_face(&ds.stencil.back), + } +} diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index cd636fdf8a..e1517fb441 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,6 +1,6 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; -use std::{ffi, mem, ptr, slice, sync::Arc}; +use std::{ffi, mem, num::NonZeroU32, ptr, slice, sync::Arc}; use winapi::{ shared::{dxgiformat, dxgitype, winerror}, um::{d3d12, d3d12sdklayers, d3dcompiler, synchapi, winbase}, @@ -8,6 +8,8 @@ use winapi::{ }; const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; +//TODO: find the exact value +const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12::D3D12_HEAP_FLAG_NONE; impl super::Device { pub(super) fn new( @@ -48,13 +50,16 @@ impl super::Device { let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { Type: d3d12::D3D12_HEAP_TYPE_CUSTOM, - CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE, - MemoryPoolPreference: d3d12::D3D12_MEMORY_POOL_L0, + CPUPageProperty: d3d12::D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE, + MemoryPoolPreference: match private_caps.memory_architecture { + super::MemoryArchitecture::Unified { .. } => d3d12::D3D12_MEMORY_POOL_L0, + super::MemoryArchitecture::NonUnified => d3d12::D3D12_MEMORY_POOL_L1, + }, CreationNodeMask: 0, VisibleNodeMask: 0, }; - let hr = raw.CreateCommittedResource( + raw.CreateCommittedResource( &heap_properties, d3d12::D3D12_HEAP_FLAG_NONE, &raw_desc, @@ -62,17 +67,11 @@ impl super::Device { ptr::null(), &d3d12::ID3D12Resource::uuidof(), zero_buffer.mut_void(), - ); - - hr.into_device_result("Zero buffer creation")?; + ) + .into_device_result("Zero buffer creation")?; - let range = d3d12::D3D12_RANGE { Begin: 0, End: 0 }; - let mut ptr = std::ptr::null_mut(); - (*zero_buffer) - .Map(0, &range, &mut ptr) - .into_device_result("Map zero buffer")?; - slice::from_raw_parts_mut(ptr as *mut u8, super::ZERO_BUFFER_SIZE as usize).fill(0); - (*zero_buffer).Unmap(0, &range); + //Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED` + // this resource is zeroed by default. }; // maximum number of CBV/SRV/UAV descriptors in heap for Tier 1 @@ -129,6 +128,10 @@ impl super::Device { }, private_caps, shared: Arc::new(shared), + //Note: these names have to match Naga's convention + vertex_attribute_names: (0..d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) + .map(|i| ffi::CString::new(format!("LOC{}", i)).unwrap()) + .collect(), rtv_pool: Mutex::new(descriptor::CpuPool::new( raw, native::DescriptorHeapType::Rtv, @@ -651,7 +654,11 @@ impl crate::Device for super::Device { let hr = self.raw.CreateCommittedResource( &heap_properties, - d3d12::D3D12_HEAP_FLAG_NONE, + if self.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12::D3D12_HEAP_FLAG_NONE + }, &raw_desc, d3d12::D3D12_RESOURCE_STATE_COMMON, ptr::null(), @@ -726,7 +733,11 @@ impl crate::Device for super::Device { let hr = self.raw.CreateCommittedResource( &heap_properties, - d3d12::D3D12_HEAP_FLAG_NONE, + if self.private_caps.heap_create_not_zeroed { + D3D12_HEAP_FLAG_CREATE_NOT_ZEROED + } else { + d3d12::D3D12_HEAP_FLAG_NONE + }, &raw_desc, d3d12::D3D12_RESOURCE_STATE_COMMON, ptr::null(), @@ -975,6 +986,16 @@ impl crate::Device for super::Device { // Root Descriptors 1 // ... + //TODO: reverse the order, according to this advice in + // https://microsoft.github.io/DirectX-Specs/d3d/ResourceBinding.html#binding-model + //> Furthermore, applications should generally sort the layout + //> of the root arguments in decreasing order of change frequency. + //> This way if some implementations need to switch to a different + //> memory storage scheme to version parts of a heavily populated + //> root arguments, the data that is changing at the highest frequency + //> (near the start of the root arguments) is most likely to run + //> as efficiently as possible. + let mut root_offset = 0u32; let root_constants: &[()] = &[]; @@ -1338,7 +1359,181 @@ impl crate::Device for super::Device { &self, desc: &crate::RenderPipelineDescriptor, ) -> Result { - unimplemented!() + let (topology_class, topology) = conv::map_topology(desc.primitive.topology); + let mut shader_stages = wgt::ShaderStages::VERTEX; + + let blob_vs = + self.load_shader(&desc.vertex_stage, desc.layout, naga::ShaderStage::Vertex)?; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment)? + } + None => native::Blob::null(), + }; + + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides + .iter_mut() + .zip(desc.vertex_buffers) + .enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::InputStepMode::Vertex => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::InputStepMode::Instance => { + (d3d12::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + let name = &self.vertex_attribute_names[attribute.shader_location as usize]; + input_element_descs.push(d3d12::D3D12_INPUT_ELEMENT_DESC { + SemanticName: name.as_ptr(), + SemanticIndex: attribute.shader_location, + Format: conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + + let mut rtv_formats = [dxgiformat::DXGI_FORMAT_UNKNOWN; + d3d12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; + for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { + *rtv_format = conv::map_texture_format(ct.format); + } + + let bias = desc + .depth_stencil + .as_ref() + .map(|ds| ds.bias.clone()) + .unwrap_or_default(); + + let raw_rasterizer = d3d12::D3D12_RASTERIZER_DESC { + FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), + CullMode: match desc.primitive.cull_mode { + None => d3d12::D3D12_CULL_MODE_NONE, + Some(wgt::Face::Front) => d3d12::D3D12_CULL_MODE_FRONT, + Some(wgt::Face::Back) => d3d12::D3D12_CULL_MODE_BACK, + }, + FrontCounterClockwise: match desc.primitive.front_face { + wgt::FrontFace::Cw => 0, + wgt::FrontFace::Ccw => 1, + }, + DepthBias: bias.constant, + DepthBiasClamp: bias.clamp, + SlopeScaledDepthBias: bias.slope_scale, + DepthClipEnable: if desc.primitive.clamp_depth { 0 } else { 1 }, + MultisampleEnable: if desc.multisample.count > 1 { 1 } else { 0 }, + ForcedSampleCount: 0, + AntialiasedLineEnable: 0, + ConservativeRaster: if desc.primitive.conservative { + d3d12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_ON + } else { + d3d12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF + }, + }; + + let raw_desc = d3d12::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: desc.layout.raw.as_mut_ptr(), + VS: *native::Shader::from_blob(blob_vs), + PS: if blob_fs.is_null() { + *native::Shader::null() + } else { + *native::Shader::from_blob(blob_fs) + }, + GS: *native::Shader::null(), + DS: *native::Shader::null(), + HS: *native::Shader::null(), + StreamOutput: d3d12::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }, + BlendState: d3d12::D3D12_BLEND_DESC { + AlphaToCoverageEnable: if desc.multisample.alpha_to_coverage_enabled { + 1 + } else { + 0 + }, + IndependentBlendEnable: 1, + RenderTarget: conv::map_render_targets(desc.color_targets), + }, + SampleMask: desc.multisample.mask as u32, + RasterizerState: raw_rasterizer, + DepthStencilState: match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => mem::zeroed(), + }, + InputLayout: d3d12::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF, + Some(wgt::IndexFormat::Uint32) => { + d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => d3d12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: desc + .depth_stencil + .as_ref() + .map_or(dxgiformat::DXGI_FORMAT_UNKNOWN, |ds| { + conv::map_texture_format(ds.format) + }), + SampleDesc: dxgitype::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }, + NodeMask: 0, + CachedPSO: d3d12::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }, + Flags: d3d12::D3D12_PIPELINE_STATE_FLAG_NONE, + }; + + let mut raw = native::PipelineState::null(); + let hr = self.raw.CreateGraphicsPipelineState( + &raw_desc, + &d3d12::ID3D12PipelineState::uuidof(), + raw.mut_void(), + ); + + blob_vs.destroy(); + if !blob_fs.is_null() { + blob_fs.destroy(); + } + + hr.into_result() + .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.into_owned()))?; + + if let Some(name) = desc.label { + let cwstr = conv::map_label(name); + raw.SetName(cwstr.as_ptr()); + } + + Ok(super::RenderPipeline { + raw, + signature: desc.layout.raw, + topology, + vertex_strides, + }) } unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { pipeline.raw.destroy(); @@ -1348,17 +1543,17 @@ impl crate::Device for super::Device { &self, desc: &crate::ComputePipelineDescriptor, ) -> Result { - let cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; + let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?; let pair = self.raw.create_compute_pipeline_state( desc.layout.raw, - native::Shader::from_blob(cs), + native::Shader::from_blob(blob_cs), 0, native::CachedPSO::null(), native::PipelineStateFlags::empty(), ); - cs.destroy(); + blob_cs.destroy(); let raw = pair.into_result().map_err(|err| { crate::PipelineError::Linkage(wgt::ShaderStages::COMPUTE, err.into_owned()) @@ -1369,7 +1564,10 @@ impl crate::Device for super::Device { raw.SetName(cwstr.as_ptr()); } - Ok(super::ComputePipeline { raw }) + Ok(super::ComputePipeline { + raw, + signature: desc.layout.raw, + }) } unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { pipeline.raw.destroy(); diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 23af6212f1..4c2e43ab94 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -16,7 +16,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, mem, num::NonZeroU32, ptr, sync::Arc}; +use std::{borrow::Cow, ffi, mem, num::NonZeroU32, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -132,6 +132,7 @@ struct PrivateCapabilities { heterogeneous_resource_heaps: bool, memory_architecture: MemoryArchitecture, shader_debug_info: bool, + heap_create_not_zeroed: bool, } #[derive(Default)] @@ -197,6 +198,7 @@ pub struct Device { idler: Idler, private_caps: PrivateCapabilities, shared: Arc, + vertex_attribute_names: Vec, // CPU only pools rtv_pool: Mutex, dsv_pool: Mutex, @@ -433,6 +435,7 @@ pub struct ShaderModule { pub struct RenderPipeline { raw: native::PipelineState, + signature: native::RootSignature, topology: d3d12::D3D12_PRIMITIVE_TOPOLOGY, vertex_strides: [Option; crate::MAX_VERTEX_BUFFERS], } @@ -442,6 +445,7 @@ unsafe impl Sync for RenderPipeline {} pub struct ComputePipeline { raw: native::PipelineState, + signature: native::RootSignature, } unsafe impl Send for ComputePipeline {} diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index bda8bc7e9b..bb92953203 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -110,7 +110,7 @@ bitflags::bitflags! { /// Vulkan + Metal + DX12 + Browser WebGPU const PRIMARY = Self::VULKAN.bits | Self::METAL.bits - | Self::DX12.bits + //| Self::DX12.bits // enable when Naga is polished | Self::BROWSER_WEBGPU.bits; /// All the apis that wgpu offers second tier of support for. These may /// be unsupported/still experimental. From 450e5dc08b41e0377dcf6556e0ead67a29d62418 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 13 Jul 2021 00:59:40 -0400 Subject: [PATCH 31/33] hal/dx12: queue operations --- wgpu-hal/src/dx12/adapter.rs | 5 ++++- wgpu-hal/src/dx12/device.rs | 23 +++++++++++++---------- wgpu-hal/src/dx12/instance.rs | 4 ++-- wgpu-hal/src/dx12/mod.rs | 31 +++++++++++++++++++++++++++---- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 15df11842d..16d3f90604 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -250,7 +250,10 @@ impl crate::Adapter for super::Adapter { )?; Ok(crate::OpenDevice { device, - queue: super::Queue { raw: queue }, + queue: super::Queue { + raw: queue, + temp_lists: Vec::new(), + }, }) } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index e1517fb441..88fa17dd7a 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,6 +1,6 @@ use super::{conv, descriptor, HResult as _}; use parking_lot::Mutex; -use std::{ffi, mem, num::NonZeroU32, ptr, slice, sync::Arc}; +use std::{ffi, mem, num::NonZeroU32, ptr, slice, sync::Arc, thread}; use winapi::{ shared::{dxgiformat, dxgitype, winerror}, um::{d3d12, d3d12sdklayers, d3dcompiler, synchapi, winbase}, @@ -568,6 +568,7 @@ impl super::Device { match hr.into_result() { Ok(()) => Ok(shader_data), Err(e) => { + log::warn!("Naga generated shader:\n{}", source); let message = unsafe { let slice = slice::from_raw_parts( error.GetBufferPointer() as *const u8, @@ -593,16 +594,18 @@ impl crate::Device for super::Device { self.sampler_pool.into_inner().destroy(); self.shared.destroy(); - //self.descriptor_updater.lock().destroy(); - // Debug tracking alive objects - if let Ok(debug_device) = self - .raw - .cast::() - .into_result() - { - debug_device.ReportLiveDeviceObjects(d3d12sdklayers::D3D12_RLDO_DETAIL); - debug_device.destroy(); + if !thread::panicking() { + if let Ok(debug_device) = self + .raw + .cast::() + .into_result() + { + debug_device.ReportLiveDeviceObjects( + d3d12sdklayers::D3D12_RLDO_SUMMARY | d3d12sdklayers::D3D12_RLDO_IGNORE_INTERNAL, + ); + debug_device.destroy(); + } } self.raw.destroy(); diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs index 001033ca31..7647ce448c 100644 --- a/wgpu-hal/src/dx12/instance.rs +++ b/wgpu-hal/src/dx12/instance.rs @@ -50,7 +50,7 @@ unsafe extern "system" fn output_debug_string_handler( None => return excpt::EXCEPTION_CONTINUE_SEARCH, }; - log::log!(level, "{}", message,); + log::log!(level, "{}", message.trim_end_matches("\n\0")); if cfg!(debug_assertions) && level == log::Level::Error { std::process::exit(1); @@ -132,7 +132,7 @@ impl crate::Instance for super::Instance { Ok(Self { factory, library: Arc::new(lib_main), - lib_dxgi, + _lib_dxgi: lib_dxgi, flags: desc.flags, }) } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 4c2e43ab94..28dd1219f3 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -96,7 +96,7 @@ const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10; pub struct Instance { factory: native::Factory4, library: Arc, - lib_dxgi: native::DxgiLib, + _lib_dxgi: native::DxgiLib, flags: crate::InstanceFlags, } @@ -109,6 +109,7 @@ struct SwapChain { // when the swapchain is destroyed resources: Vec, waitable: winnt::HANDLE, + present_mode: wgt::PresentMode, acquired_count: usize, } @@ -213,6 +214,7 @@ unsafe impl Sync for Device {} pub struct Queue { raw: native::CommandQueue, + temp_lists: Vec, } unsafe impl Send for Queue {} @@ -489,6 +491,7 @@ impl crate::Surface for Surface { let non_srgb_format = conv::map_texture_format_nosrgb(config.format); let swap_chain = match self.swap_chain.take() { + //Note: this path doesn't properly re-initialize all of the things Some(sc) => { // can't have image resources in flight used by GPU let _ = device.wait_idle(); @@ -575,9 +578,7 @@ impl crate::Surface for Surface { resources, waitable, acquired_count: 0, - //format: config.format, - //size: config.extent, - //mode: config.present_mode, + present_mode: config.present_mode, }); Ok(()) @@ -609,6 +610,18 @@ impl crate::Queue for Queue { command_buffers: &[&CommandBuffer], signal_fence: Option<(&mut Fence, crate::FenceValue)>, ) -> Result<(), crate::DeviceError> { + self.temp_lists.clear(); + for cmd_buf in command_buffers { + self.temp_lists.push(cmd_buf.raw.as_list()); + } + + self.raw.execute_command_lists(&self.temp_lists); + + if let Some((fence, value)) = signal_fence { + self.raw + .signal(fence.raw, value) + .into_device_result("Signal fence")?; + } Ok(()) } unsafe fn present( @@ -616,6 +629,16 @@ impl crate::Queue for Queue { surface: &mut Surface, texture: Texture, ) -> Result<(), crate::SurfaceError> { + let sc = surface.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; + + let (interval, flags) = match sc.present_mode { + wgt::PresentMode::Immediate => (0, dxgi::DXGI_PRESENT_ALLOW_TEARING), + wgt::PresentMode::Fifo => (1, 0), + wgt::PresentMode::Mailbox => (1, 0), + }; + sc.raw.Present(interval, flags); + Ok(()) } } From 7fb0315700a4b7ad4128a770187900742714fa1d Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 13 Jul 2021 01:07:15 -0400 Subject: [PATCH 32/33] hal/dx12: build fixes for the new arrayvec --- wgpu-hal/src/dx12/device.rs | 4 ++-- wgpu-hal/src/dx12/mod.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 88fa17dd7a..8082a056f3 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1027,7 +1027,7 @@ impl crate::Device for super::Device { let mut ranges = Vec::with_capacity(total_non_dynamic_entries); let mut bind_group_infos = - arrayvec::ArrayVec::<[super::BindGroupInfo; crate::MAX_BIND_GROUPS]>::default(); + arrayvec::ArrayVec::::default(); for (index, bgl) in desc.bind_group_layouts.iter().enumerate() { let space = root_space_offset + index as u32; let mut info = super::BindGroupInfo { @@ -1414,7 +1414,7 @@ impl crate::Device for super::Device { let bias = desc .depth_stencil .as_ref() - .map(|ds| ds.bias.clone()) + .map(|ds| ds.bias) .unwrap_or_default(); let raw_rasterizer = d3d12::D3D12_RASTERIZER_DESC { diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 28dd1219f3..672acba969 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -247,7 +247,7 @@ enum PassKind { struct PassState { has_label: bool, - resolves: ArrayVec<[PassResolve; crate::MAX_COLOR_TARGETS]>, + resolves: ArrayVec, vertex_buffers: [d3d12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], dirty_vertex_buffers: usize, kind: PassKind, @@ -423,7 +423,7 @@ pub struct PipelineLayout { raw: native::RootSignature, // Storing for each associated bind group, which tables we created // in the root signature. This is required for binding descriptor sets. - bind_group_infos: ArrayVec, + bind_group_infos: ArrayVec, naga_options: naga::back::hlsl::Options, } From 0df804e9c2422d86e7cb5a1dbd41ff2acf181257 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 13 Jul 2021 02:54:37 -0400 Subject: [PATCH 33/33] hal/dx12: bunch of validation fixes --- wgpu-core/src/device/mod.rs | 1 + wgpu-core/src/device/queue.rs | 9 ++++ wgpu-core/src/hub.rs | 14 ++++--- wgpu-hal/src/dx12/adapter.rs | 3 -- wgpu-hal/src/dx12/command.rs | 12 +++--- wgpu-hal/src/dx12/conv.rs | 31 ++++++++++---- wgpu-hal/src/dx12/device.rs | 79 ++++++++++++++++++++--------------- wgpu-hal/src/dx12/instance.rs | 2 +- wgpu-hal/src/dx12/mod.rs | 53 +++++++++++++++++++---- 9 files changed, 139 insertions(+), 65 deletions(-) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index a315458180..1b9bd3ac34 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -2418,6 +2418,7 @@ impl Device { /// Wait for idle and remove resources that we can, before we die. pub(crate) fn prepare_to_die(&mut self) { + self.pending_writes.deactivate(); let mut life_tracker = self.life_tracker.lock(); let current_index = self.active_submission_index; if let Err(error) = unsafe { self.raw.wait(&self.fence, current_index, CLEANUP_WAIT_MS) } { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index e302bd8c6a..577a6fd7e7 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -166,6 +166,15 @@ impl PendingWrites { } &mut self.command_encoder } + + pub fn deactivate(&mut self) { + if self.is_active { + unsafe { + self.command_encoder.discard_encoding(); + } + self.is_active = false; + } + } } #[derive(Default)] diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index cb367e0332..5d632211da 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -636,6 +636,14 @@ impl Hub { } } + // destroy command buffers first, since otherwise DX12 isn't happy + for element in self.command_buffers.data.write().map.drain(..) { + if let Element::Occupied(command_buffer, _) = element { + let device = &devices[command_buffer.device_id.value]; + device.destroy_command_buffer(command_buffer); + } + } + for element in self.samplers.data.write().map.drain(..) { if let Element::Occupied(sampler, _) = element { unsafe { @@ -673,12 +681,6 @@ impl Hub { devices[buffer.device_id.value].destroy_buffer(buffer); } } - for element in self.command_buffers.data.write().map.drain(..) { - if let Element::Occupied(command_buffer, _) = element { - let device = &devices[command_buffer.device_id.value]; - device.destroy_command_buffer(command_buffer); - } - } for element in self.bind_groups.data.write().map.drain(..) { if let Element::Occupied(bind_group, _) = element { let device = &devices[bind_group.device_id.value]; diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 16d3f90604..040e6083f5 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -264,10 +264,7 @@ impl crate::Adapter for super::Adapter { ) -> crate::TextureFormatCapabilities { use crate::TextureFormatCapabilities as Tfc; - let info = format.describe(); - let is_compressed = info.block_dimensions != (1, 1); let raw_format = conv::map_texture_format(format); - let mut data = d3d12::D3D12_FEATURE_DATA_FORMAT_SUPPORT { Format: raw_format, Support1: mem::zeroed(), diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 1bcf7e38bf..9f3ea39e98 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -36,7 +36,7 @@ impl super::CommandEncoder { unsafe fn end_pass(&mut self) { let list = self.list.unwrap(); - list.set_descriptor_heaps(&[native::DescriptorHeap::null(); 2]); + list.set_descriptor_heaps(&[]); if self.pass.has_label { list.EndEvent(); } @@ -344,7 +344,7 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn copy_texture_to_buffer( &mut self, src: &super::Texture, - src_usage: crate::TextureUses, + _src_usage: crate::TextureUses, dst: &super::Buffer, regions: T, ) where @@ -364,7 +364,7 @@ impl crate::CommandEncoder for super::CommandEncoder { let raw_format = conv::map_texture_format(src.format); for r in regions { - let dst_box = make_box(&r.texture_base.origin, &r.size); + let src_box = make_box(&r.texture_base.origin, &r.size); *src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.texture_base); *dst_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT { Offset: r.buffer_layout.offset, @@ -380,7 +380,7 @@ impl crate::CommandEncoder for super::CommandEncoder { }, }; - list.CopyTextureRegion(&src_location, 0, 0, 0, &dst_location, &dst_box); + list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box); } } @@ -410,7 +410,7 @@ impl crate::CommandEncoder for super::CommandEncoder { range: Range, buffer: &super::Buffer, offset: wgt::BufferAddress, - stride: wgt::BufferSize, + _stride: wgt::BufferSize, ) { self.list.unwrap().ResolveQueryData( set.raw.as_mut_ptr(), @@ -508,7 +508,7 @@ impl crate::CommandEncoder for super::CommandEncoder { pResource: resolve.src.0.as_mut_ptr(), Subresource: resolve.src.1, StateBefore: d3d12::D3D12_RESOURCE_STATE_RENDER_TARGET, - StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_DEST, + StateAfter: d3d12::D3D12_RESOURCE_STATE_RESOLVE_SOURCE, }; self.temp.barriers.push(barrier); *barrier.u.Transition_mut() = d3d12::D3D12_RESOURCE_TRANSITION_BARRIER { diff --git a/wgpu-hal/src/dx12/conv.rs b/wgpu-hal/src/dx12/conv.rs index 7ea1c5885b..cfe11a638d 100644 --- a/wgpu-hal/src/dx12/conv.rs +++ b/wgpu-hal/src/dx12/conv.rs @@ -100,9 +100,9 @@ pub(super) fn map_texture_format(format: wgt::TextureFormat) -> dxgiformat::DXGI } } +//Note: DXGI doesn't allow sRGB format on the swapchain, +// but creating RTV of swapchain buffers with sRGB works. pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { - // NOTE: DXGI doesn't allow sRGB format on the swapchain, but - // creating RTV of swapchain buffers with sRGB works match format { wgt::TextureFormat::Bgra8UnormSrgb => dxgiformat::DXGI_FORMAT_B8G8R8A8_UNORM, wgt::TextureFormat::Rgba8UnormSrgb => dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, @@ -110,6 +110,24 @@ pub fn map_texture_format_nosrgb(format: wgt::TextureFormat) -> dxgiformat::DXGI } } +//Note: SRV and UAV can't use the depth formats directly +//TODO: stencil views? +pub fn map_texture_format_nodepth(format: wgt::TextureFormat) -> dxgiformat::DXGI_FORMAT { + match format { + wgt::TextureFormat::Depth32Float => dxgiformat::DXGI_FORMAT_R32_FLOAT, + wgt::TextureFormat::Depth24Plus | wgt::TextureFormat::Depth24PlusStencil8 => { + dxgiformat::DXGI_FORMAT_R24_UNORM_X8_TYPELESS + } + _ => { + assert_eq!( + crate::FormatAspects::from(format), + crate::FormatAspects::COLOR + ); + map_texture_format(format) + } + } +} + pub fn map_index_format(format: wgt::IndexFormat) -> dxgiformat::DXGI_FORMAT { match format { wgt::IndexFormat::Uint16 => dxgiformat::DXGI_FORMAT_R16_UINT, @@ -170,9 +188,6 @@ pub fn map_buffer_usage_to_resource_flags(usage: crate::BufferUses) -> d3d12::D3 if usage.contains(crate::BufferUses::STORAGE_STORE) { flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } - if !usage.intersects(crate::BufferUses::UNIFORM | crate::BufferUses::STORAGE_LOAD) { - flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - } flags } @@ -196,13 +211,13 @@ pub fn map_texture_usage_to_resource_flags( crate::TextureUses::DEPTH_STENCIL_READ | crate::TextureUses::DEPTH_STENCIL_WRITE, ) { flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + if !usage.intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) { + flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; + } } if usage.contains(crate::TextureUses::STORAGE_STORE) { flags |= d3d12::D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } - if !usage.intersects(crate::TextureUses::SAMPLED | crate::TextureUses::STORAGE_LOAD) { - flags |= d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - } flags } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 8082a056f3..0f5f5a3570 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -7,6 +7,8 @@ use winapi::{ Interface, }; +// this has to match Naga's HLSL backend, and also needs to be null-terminated +const NAGA_LOCATION_SEMANTIC: &[u8] = b"LOC\0"; const D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING: u32 = 0x1688; //TODO: find the exact value const D3D12_HEAP_FLAG_CREATE_NOT_ZEROED: u32 = d3d12::D3D12_HEAP_FLAG_NONE; @@ -45,7 +47,7 @@ impl super::Device { Quality: 0, }, Layout: d3d12::D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - Flags: d3d12::D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE, + Flags: d3d12::D3D12_RESOURCE_FLAG_NONE, }; let heap_properties = d3d12::D3D12_HEAP_PROPERTIES { @@ -110,7 +112,7 @@ impl super::Device { heap_views: descriptor::GeneralHeap::new( raw, native::DescriptorHeapType::CbvSrvUav, - capacity_samplers, + capacity_views, )?, heap_samplers: descriptor::GeneralHeap::new( raw, @@ -128,10 +130,6 @@ impl super::Device { }, private_caps, shared: Arc::new(shared), - //Note: these names have to match Naga's convention - vertex_attribute_names: (0..d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) - .map(|i| ffi::CString::new(format!("LOC{}", i)).unwrap()) - .collect(), rtv_pool: Mutex::new(descriptor::CpuPool::new( raw, native::DescriptorHeapType::Rtv, @@ -153,7 +151,12 @@ impl super::Device { } pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> { - let value = self.idler.fence.get_value() + 1; + let cur_value = self.idler.fence.get_value(); + if cur_value == !0 { + return Err(crate::DeviceError::Lost); + } + + let value = cur_value + 1; log::info!("Waiting for idle with value {}", value); self.present_queue.signal(self.idler.fence, value); let hr = self @@ -171,7 +174,7 @@ impl super::Device { desc: &crate::TextureViewDescriptor, ) -> descriptor::Handle { let mut raw_desc = d3d12::D3D12_SHADER_RESOURCE_VIEW_DESC { - Format: conv::map_texture_format(desc.format), + Format: conv::map_texture_format_nodepth(desc.format), ViewDimension: 0, Shader4ComponentMapping: D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING, u: mem::zeroed(), @@ -280,7 +283,7 @@ impl super::Device { desc: &crate::TextureViewDescriptor, ) -> descriptor::Handle { let mut raw_desc = d3d12::D3D12_UNORDERED_ACCESS_VIEW_DESC { - Format: conv::map_texture_format(desc.format), + Format: conv::map_texture_format_nodepth(desc.format), ViewDimension: 0, u: mem::zeroed(), }; @@ -524,7 +527,7 @@ impl super::Device { //TODO: reuse the writer let mut source = String::new(); let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options); - let reflection_info = writer + let _reflection_info = writer .write(module, &stage.module.naga.info) .map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {:?}", e)))?; @@ -593,6 +596,7 @@ impl crate::Device for super::Device { self.srv_uav_pool.into_inner().destroy(); self.sampler_pool.into_inner().destroy(); self.shared.destroy(); + self.idler.destroy(); // Debug tracking alive objects if !thread::panicking() { @@ -616,11 +620,16 @@ impl crate::Device for super::Device { desc: &crate::BufferDescriptor, ) -> Result { let mut resource = native::Resource::null(); + let mut size = desc.size; + if desc.usage.contains(crate::BufferUses::UNIFORM) { + let align_mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT as u64 - 1; + size = ((size - 1) | align_mask) + 1; + } let raw_desc = d3d12::D3D12_RESOURCE_DESC { Dimension: d3d12::D3D12_RESOURCE_DIMENSION_BUFFER, Alignment: 0, - Width: desc.size, + Width: size, Height: 1, DepthOrArraySize: 1, MipLevels: 1, @@ -670,10 +679,12 @@ impl crate::Device for super::Device { ); hr.into_device_result("Buffer creation")?; - Ok(super::Buffer { - resource, - size: desc.size, - }) + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + resource.SetName(cwstr.as_ptr()); + } + + Ok(super::Buffer { resource, size }) } unsafe fn destroy_buffer(&self, buffer: super::Buffer) { buffer.resource.destroy(); @@ -697,8 +708,8 @@ impl crate::Device for super::Device { (*buffer.resource).Unmap(0, &d3d12::D3D12_RANGE { Begin: 0, End: 0 }); Ok(()) } - unsafe fn flush_mapped_ranges(&self, _buffer: &super::Buffer, ranges: I) {} - unsafe fn invalidate_mapped_ranges(&self, _buffer: &super::Buffer, ranges: I) {} + unsafe fn flush_mapped_ranges(&self, _buffer: &super::Buffer, _ranges: I) {} + unsafe fn invalidate_mapped_ranges(&self, _buffer: &super::Buffer, _ranges: I) {} unsafe fn create_texture( &self, @@ -743,17 +754,17 @@ impl crate::Device for super::Device { }, &raw_desc, d3d12::D3D12_RESOURCE_STATE_COMMON, - ptr::null(), + ptr::null(), // clear value &d3d12::ID3D12Resource::uuidof(), resource.mut_void(), ); + hr.into_device_result("Texture creation")?; if let Some(label) = desc.label { let cwstr = conv::map_label(label); resource.SetName(cwstr.as_ptr()); } - hr.into_device_result("Texture creation")?; Ok(super::Texture { resource, format: desc.format, @@ -886,6 +897,12 @@ impl crate::Device for super::Device { .raw .create_command_allocator(native::CmdListType::Direct) .into_device_result("Command allocator creation")?; + + if let Some(label) = desc.label { + let cwstr = conv::map_label(label); + allocator.SetName(cwstr.as_ptr()); + } + Ok(super::CommandEncoder { allocator, device: self.raw, @@ -999,7 +1016,6 @@ impl crate::Device for super::Device { //> (near the start of the root arguments) is most likely to run //> as efficiently as possible. - let mut root_offset = 0u32; let root_constants: &[()] = &[]; // Number of elements in the root signature. @@ -1077,7 +1093,6 @@ impl crate::Device for super::Device { &ranges[range_base..], )); info.tables |= super::TableTypes::SRV_CBV_UAV; - root_offset += 1; } // Sampler descriptor tables @@ -1103,7 +1118,6 @@ impl crate::Device for super::Device { &ranges[range_base..], )); info.tables |= super::TableTypes::SAMPLERS; - root_offset += 1; } // Root (dynamic) descriptor tables @@ -1137,7 +1151,6 @@ impl crate::Device for super::Device { }; info.dynamic_buffers.push(kind); parameters.push(param); - root_offset += 2; // root view costs 2 words } bind_group_infos.push(info); @@ -1217,10 +1230,13 @@ impl crate::Device for super::Device { for (layout, entry) in desc.layout.entries.iter().zip(desc.entries.iter()) { match layout.ty { wgt::BindingType::Buffer { - has_dynamic_offset, - ty, + has_dynamic_offset: true, .. } => { + let data = &desc.buffers[entry.resource_index as usize]; + dynamic_buffers.push(data.resolve_address()); + } + wgt::BindingType::Buffer { ty, .. } => { let data = &desc.buffers[entry.resource_index as usize]; let gpu_address = data.resolve_address(); let size = data.resolve_size() as u32; @@ -1228,14 +1244,12 @@ impl crate::Device for super::Device { let cpu_index = inner.stage.len() as u32; let handle = desc.layout.cpu_heap_views.as_ref().unwrap().at(cpu_index); match ty { - _ if has_dynamic_offset => { - dynamic_buffers.push(gpu_address); - } wgt::BufferBindingType::Uniform => { - let mask = d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; + let size_mask = + d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1; let raw_desc = d3d12::D3D12_CONSTANT_BUFFER_VIEW_DESC { BufferLocation: gpu_address, - SizeInBytes: size, + SizeInBytes: ((size - 1) | size_mask) + 1, }; self.raw.CreateConstantBufferView(&raw_desc, handle); } @@ -1344,7 +1358,7 @@ impl crate::Device for super::Device { unsafe fn create_shader_module( &self, - desc: &crate::ShaderModuleDescriptor, + _desc: &crate::ShaderModuleDescriptor, shader: crate::ShaderInput, ) -> Result { match shader { @@ -1392,9 +1406,8 @@ impl crate::Device for super::Device { } }; for attribute in vbuf.attributes { - let name = &self.vertex_attribute_names[attribute.shader_location as usize]; input_element_descs.push(d3d12::D3D12_INPUT_ELEMENT_DESC { - SemanticName: name.as_ptr(), + SemanticName: NAGA_LOCATION_SEMANTIC.as_ptr() as *const _, SemanticIndex: attribute.shader_location, Format: conv::map_vertex_format(attribute.format), InputSlot: i as u32, diff --git a/wgpu-hal/src/dx12/instance.rs b/wgpu-hal/src/dx12/instance.rs index 7647ce448c..1e40a45a86 100644 --- a/wgpu-hal/src/dx12/instance.rs +++ b/wgpu-hal/src/dx12/instance.rs @@ -41,7 +41,7 @@ unsafe extern "system" fn output_debug_string_handler( Some(msg) => { match MESSAGE_PREFIXES .iter() - .find(|&&(prefix, level)| msg.starts_with(prefix)) + .find(|&&(prefix, _)| msg.starts_with(prefix)) { Some(&(prefix, level)) => (&msg[prefix.len() + 2..], level), None => (msg, log::Level::Debug), diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 672acba969..8dea5cc9e4 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -5,8 +5,6 @@ !*/ -#![allow(unused_variables)] - mod adapter; mod command; mod conv; @@ -16,7 +14,7 @@ mod instance; use arrayvec::ArrayVec; use parking_lot::Mutex; -use std::{borrow::Cow, ffi, mem, num::NonZeroU32, ptr, sync::Arc}; +use std::{borrow::Cow, mem, num::NonZeroU32, ptr, sync::Arc}; use winapi::{ shared::{dxgi, dxgi1_2, dxgi1_4, dxgiformat, dxgitype, windef, winerror}, um::{d3d12, synchapi, winbase, winnt}, @@ -109,8 +107,10 @@ struct SwapChain { // when the swapchain is destroyed resources: Vec, waitable: winnt::HANDLE, - present_mode: wgt::PresentMode, acquired_count: usize, + present_mode: wgt::PresentMode, + format: wgt::TextureFormat, + size: wgt::Extent3d, } pub struct Surface { @@ -162,6 +162,12 @@ struct Idler { event: native::Event, } +impl Idler { + unsafe fn destroy(self) { + self.fence.destroy(); + } +} + struct CommandSignatures { draw: native::CommandSignature, draw_indexed: native::CommandSignature, @@ -199,7 +205,6 @@ pub struct Device { idler: Idler, private_caps: PrivateCapabilities, shared: Arc, - vertex_attribute_names: Vec, // CPU only pools rtv_pool: Mutex, dsv_pool: Mutex, @@ -220,6 +225,14 @@ pub struct Queue { unsafe impl Send for Queue {} unsafe impl Sync for Queue {} +impl Drop for Queue { + fn drop(&mut self) { + unsafe { + self.raw.destroy(); + } + } +} + #[derive(Default)] struct Temp { marker: Vec, @@ -579,6 +592,8 @@ impl crate::Surface for Surface { waitable, acquired_count: 0, present_mode: config.present_mode, + format: config.format, + size: config.extent, }); Ok(()) @@ -599,9 +614,31 @@ impl crate::Surface for Surface { &mut self, timeout_ms: u32, ) -> Result>, crate::SurfaceError> { - Ok(None) + let sc = self.swap_chain.as_mut().unwrap(); + + sc.wait(timeout_ms)?; + + let base_index = sc.raw.GetCurrentBackBufferIndex() as usize; + let index = (base_index + sc.acquired_count) % sc.resources.len(); + sc.acquired_count += 1; + + let texture = Texture { + resource: sc.resources[index], + format: sc.format, + dimension: wgt::TextureDimension::D2, + size: sc.size, + mip_level_count: 1, + sample_count: 1, + }; + Ok(Some(crate::AcquiredSurfaceTexture { + texture, + suboptimal: false, + })) + } + unsafe fn discard_texture(&mut self, _texture: Texture) { + let sc = self.swap_chain.as_mut().unwrap(); + sc.acquired_count -= 1; } - unsafe fn discard_texture(&mut self, texture: Texture) {} } impl crate::Queue for Queue { @@ -627,7 +664,7 @@ impl crate::Queue for Queue { unsafe fn present( &mut self, surface: &mut Surface, - texture: Texture, + _texture: Texture, ) -> Result<(), crate::SurfaceError> { let sc = surface.swap_chain.as_mut().unwrap(); sc.acquired_count -= 1;