Skip to content

Commit

Permalink
hal/dx12: improve RowPitch computation (#2409)
Browse files Browse the repository at this point in the history
  • Loading branch information
kvark committed Jan 21, 2022
1 parent 06a4bce commit b0e7bb3
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 62 deletions.
3 changes: 1 addition & 2 deletions wgpu-core/src/command/clear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use std::{num::NonZeroU32, ops::Range};
#[cfg(feature = "trace")]
use crate::device::trace::Command as TraceCommand;
use crate::{
align_to,
command::CommandBuffer,
device::Device,
get_lowest_common_denom,
Expand All @@ -14,7 +13,7 @@ use crate::{
track::{ResourceTracker, TextureSelector, TextureState},
};

use hal::CommandEncoder as _;
use hal::{auxil::align_to, CommandEncoder as _};
use thiserror::Error;
use wgt::{BufferAddress, BufferSize, BufferUsages, ImageSubresourceRange, TextureAspect};

Expand Down
3 changes: 1 addition & 2 deletions wgpu-core/src/device/queue.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#[cfg(feature = "trace")]
use crate::device::trace::Action;
use crate::{
align_to,
command::{
extract_texture_selector, validate_linear_texture_data, validate_texture_copy_range,
ClearError, CommandBuffer, CopySide, ImageCopyTexture, TransferError,
Expand Down Expand Up @@ -415,7 +414,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
device.alignments.buffer_copy_pitch.get() as u32,
format_desc.block_size as u32,
);
let stage_bytes_per_row = align_to(
let stage_bytes_per_row = hal::auxil::align_to(
format_desc.block_size as u32 * width_blocks,
bytes_per_row_alignment,
);
Expand Down
8 changes: 0 additions & 8 deletions wgpu-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,6 @@ pub(crate) fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 {
}
}

#[inline]
pub(crate) fn align_to(value: u32, alignment: u32) -> u32 {
match value % alignment {
0 => value,
other => value - other + alignment,
}
}

#[test]
fn test_lcd() {
assert_eq!(get_lowest_common_denom(2, 2), 2);
Expand Down
19 changes: 5 additions & 14 deletions wgpu-hal/examples/halmark/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ struct Example<A: hal::Api> {
pipeline: A::RenderPipeline,
bunnies: Vec<Locals>,
local_buffer: A::Buffer,
local_alignment: wgt::BufferAddress,
local_alignment: u32,
global_buffer: A::Buffer,
sampler: A::Sampler,
texture: A::Texture,
Expand Down Expand Up @@ -376,22 +376,13 @@ impl<A: hal::Api> Example<A> {
buffer
};

fn align_to(
value: wgt::BufferAddress,
alignment: wgt::BufferAddress,
) -> wgt::BufferAddress {
match value % alignment {
0 => value,
other => value - other + alignment,
}
}
let local_alignment = align_to(
mem::size_of::<Locals>() as _,
capabilities.limits.min_uniform_buffer_offset_alignment as _,
let local_alignment = hal::auxil::align_to(
mem::size_of::<Locals>() as u32,
capabilities.limits.min_uniform_buffer_offset_alignment,
);
let local_buffer_desc = hal::BufferDescriptor {
label: Some("local"),
size: (MAX_BUNNIES as wgt::BufferAddress) * local_alignment,
size: (MAX_BUNNIES as wgt::BufferAddress) * (local_alignment as wgt::BufferAddress),
usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::UNIFORM,
memory_flags: hal::MemoryFlags::PREFER_COHERENT,
};
Expand Down
11 changes: 11 additions & 0 deletions wgpu-hal/src/auxil/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ pub fn map_naga_stage(stage: naga::ShaderStage) -> wgt::ShaderStages {
}
}

pub fn align_to(value: u32, alignment: u32) -> u32 {
if alignment.is_power_of_two() {
(value + alignment - 1) & !(alignment - 1)
} else {
match value % alignment {
0 => value,
other => value - other + alignment,
}
}
}

impl crate::CopyExtent {
pub fn min(&self, other: &Self) -> Self {
Self {
Expand Down
72 changes: 36 additions & 36 deletions wgpu-hal/src/dx12/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,40 @@ fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> d3d12::D3D12_BO
}
}

impl crate::BufferTextureCopy {
fn to_subresource_footprint(
&self,
format: wgt::TextureFormat,
) -> d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
let desc = format.describe();
d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
Offset: self.buffer_layout.offset,
Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT {
Format: conv::map_texture_format(format),
Width: self.size.width,
Height: self
.buffer_layout
.rows_per_image
.map_or(self.size.height, |count| {
count.get() * desc.block_dimensions.1 as u32
}),
Depth: self.size.depth,
RowPitch: {
let actual = match self.buffer_layout.bytes_per_row {
Some(count) => count.get(),
// this may happen for single-line updates
None => {
(self.size.width / desc.block_dimensions.0 as u32)
* desc.block_size as u32
}
};
crate::auxil::align_to(actual, d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)
},
},
}
}
}

impl super::Temp {
fn prepare_marker(&mut self, marker: &str) -> (&[u16], u32) {
self.marker.clear();
Expand Down Expand Up @@ -458,28 +492,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
u: mem::zeroed(),
};
let raw_format = conv::map_texture_format(dst.format);

let block_size = dst.format.describe().block_dimensions.0 as u32;
for r in regions {
let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
*src_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
Offset: r.buffer_layout.offset,
Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT {
Format: raw_format,
Width: r.size.width,
Height: r
.buffer_layout
.rows_per_image
.map_or(r.size.height, |count| count.get() * block_size),
Depth: r.size.depth,
RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| {
count.get().max(d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)
}),
},
};
*src_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(dst.format);
*dst_location.u.SubresourceIndex_mut() = dst.calc_subresource_for_copy(&r.texture_base);

list.CopyTextureRegion(
&dst_location,
r.texture_base.origin.x,
Expand Down Expand Up @@ -511,26 +527,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
u: mem::zeroed(),
};
let raw_format = conv::map_texture_format(src.format);

let block_size = src.format.describe().block_dimensions.0 as u32;
for r in regions {
let src_box = make_box(&r.texture_base.origin, &r.size);
*src_location.u.SubresourceIndex_mut() = src.calc_subresource_for_copy(&r.texture_base);
*dst_location.u.PlacedFootprint_mut() = d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
Offset: r.buffer_layout.offset,
Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT {
Format: raw_format,
Width: r.size.width,
Height: r
.buffer_layout
.rows_per_image
.map_or(r.size.height, |count| count.get() * block_size),
Depth: r.size.depth,
RowPitch: r.buffer_layout.bytes_per_row.map_or(0, |count| count.get()),
},
};

*dst_location.u.PlacedFootprint_mut() = r.to_subresource_footprint(src.format);
list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, &src_box);
}
}
Expand Down

0 comments on commit b0e7bb3

Please sign in to comment.