Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option for using custom heaps and support D3D_FEATURE_LEVEL_1_0_CORE #469

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions DxDispatch/src/dxdispatch/CommandLineArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ CommandLineArgs::CommandLineArgs(int argc, char** argv)
"Sets barrier types issued after every dispatch is recorded into a command list: none, uav, or uav+aliasing",
cxxopts::value<std::string>()->default_value("uav")
)
(
"u, custom_heaps",
"Binds input and output resources are allocated from custom heaps. Write-combined caching and system memory (L0) are used when this is specified.",
cxxopts::value<bool>()
)

// DxDispatch generates root signatures that are guaranteed to match HLSL source, which eliminates
// having to write it inline in the HLSL file. DXC for Xbox precompiles shaders for Xbox (by default),
// but precompilation requires the root signature to be in the HLSL source itself; to allow use of the
Expand Down Expand Up @@ -175,6 +181,11 @@ CommandLineArgs::CommandLineArgs(int argc, char** argv)
"Prints verbose ONNX model binding information.",
cxxopts::value<bool>()
)
(
"x,onnx_disable_gpu_timing",
"Disables collection of GPU timing and PIX events surrounding ONNX Runtime. This can decrease CPU time by preventing additional command list executions.",
cxxopts::value<bool>()
)
;

options.positional_help("<PATH_TO_MODEL>");
Expand Down Expand Up @@ -271,6 +282,11 @@ CommandLineArgs::CommandLineArgs(int argc, char** argv)
}
}

if (result.count("custom_heaps"))
{
m_customHeaps = result["custom_heaps"].as<bool>();
}

auto queueTypeStr = result["queue_type"].as<std::string>();
if (queueTypeStr == "direct")
{
Expand Down Expand Up @@ -407,5 +423,10 @@ CommandLineArgs::CommandLineArgs(int argc, char** argv)
m_onnxPrintVerboseBindingInfo = result["print_onnx_bindings"].as<bool>();
}

if (result.count("onnx_disable_gpu_timing"))
{
m_onnxDisableGpuTiming = result["onnx_disable_gpu_timing"].as<bool>();
}

m_helpText = options.help();
}
7 changes: 6 additions & 1 deletion DxDispatch/src/dxdispatch/CommandLineArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,17 @@ class CommandLineArgs
bool GetUavBarrierAfterDispatch() const { return m_uavBarrierAfterDispatch; }
bool GetAliasingBarrierAfterDispatch() const { return m_aliasingBarrierAfterDispatch; }

bool GetCustomHeaps() const { return m_customHeaps; }

// ONNX
gsl::span<const std::pair<std::string, uint32_t>> GetOnnxFreeDimensionNameOverrides() const { return m_onnxFreeDimensionNameOverrides; }
gsl::span<const std::pair<std::string, uint32_t>> GetOnnxFreeDimensionDenotationOverrides() const { return m_onnxFreeDimensionDenotationOverrides; }
gsl::span<const std::pair<std::string, std::string>> GetOnnxSessionOptionConfigEntries() const { return m_onnxSessionOptionConfigEntries; }
const std::unordered_map<std::string, std::vector<int64_t>>& GetOnnxBindingShapes() const { return m_onnxBindShapes; }
std::optional<uint32_t> GetOnnxGraphOptimizationLevel() const { return m_onnxGraphOptimizationLevel; }
std::optional<uint32_t> GetOnnxLoggingLevel() const { return m_onnxLoggingLevel; }
bool PrintVerboseOnnxBindingInfo() const { return m_onnxPrintVerboseBindingInfo; }
bool PrintVerboseOnnxBindingInfo() const { return m_onnxPrintVerboseBindingInfo; }
bool DisableGpuTiming() const { return m_onnxDisableGpuTiming; }

private:
bool m_showAdapters = false;
Expand All @@ -58,6 +61,7 @@ class CommandLineArgs
bool m_clearShaderCaches = false;
bool m_uavBarrierAfterDispatch = true;
bool m_aliasingBarrierAfterDispatch = false;
bool m_customHeaps = false;
std::string m_adapterSubstring = "";
std::filesystem::path m_modelPath;
std::string m_pixCaptureName = "dxdispatch";
Expand Down Expand Up @@ -92,4 +96,5 @@ class CommandLineArgs
std::optional<uint32_t> m_onnxGraphOptimizationLevel;
std::optional<uint32_t> m_onnxLoggingLevel;
bool m_onnxPrintVerboseBindingInfo = false;
bool m_onnxDisableGpuTiming = false;
};
68 changes: 52 additions & 16 deletions DxDispatch/src/dxdispatch/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Device::Device(

THROW_IF_FAILED(m_d3dModule->CreateDevice(
adapter,
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_1_0_CORE,
IID_PPV_ARGS(&m_d3d)));

if (debugLayersEnabled)
Expand All @@ -94,6 +94,7 @@ Device::Device(
IID_GRAPHICS_PPV_ARGS(m_fence.ReleaseAndGetAddressOf())));

D3D12_COMMAND_QUEUE_DESC queueDesc = {};
queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_DISABLE_GPU_TIMEOUT;
queueDesc.Type = commandListType;
THROW_IF_FAILED(m_d3d->CreateCommandQueue(
&queueDesc,
Expand Down Expand Up @@ -163,6 +164,34 @@ ComPtr<ID3D12Resource> Device::CreateDefaultBuffer(
return resource;
}

ComPtr<ID3D12Resource> Device::CreateBuffer(
uint64_t sizeInBytes,
D3D12_CPU_PAGE_PROPERTY cpuPageProperty,
D3D12_MEMORY_POOL memoryPoolPreference,
D3D12_RESOURCE_FLAGS resourceFlags,
uint64_t alignment,
D3D12_HEAP_FLAGS heapFlags)
{
if (cpuPageProperty == D3D12_CPU_PAGE_PROPERTY_UNKNOWN && memoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN)
{
return CreateDefaultBuffer(sizeInBytes, resourceFlags, alignment, heapFlags);
}

auto resourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeInBytes, resourceFlags, alignment);
auto heapProps = CD3DX12_HEAP_PROPERTIES(cpuPageProperty, memoryPoolPreference);

ComPtr<ID3D12Resource> resource;
THROW_IF_FAILED(m_d3d->CreateCommittedResource(
&heapProps,
heapFlags,
&resourceDesc,
D3D12_RESOURCE_STATE_COMMON,
nullptr,
IID_GRAPHICS_PPV_ARGS(resource.ReleaseAndGetAddressOf())));

return resource;
}

ComPtr<ID3D12Resource> Device::CreateUploadBuffer(
uint64_t sizeInBytes,
D3D12_RESOURCE_FLAGS resourceFlags,
Expand Down Expand Up @@ -247,23 +276,30 @@ void Device::RecordDispatch(const char* name, uint32_t threadGroupX, uint32_t th
PIXEndEvent(m_commandList.Get());
}

Microsoft::WRL::ComPtr<ID3D12Resource> Device::Upload(uint64_t totalSize, gsl::span<const std::byte> data, std::wstring_view name)
Microsoft::WRL::ComPtr<ID3D12Resource> Device::CreateBuffer(
uint64_t totalSize,
gsl::span<const std::byte> data,
std::wstring_view name,
D3D12_CPU_PAGE_PROPERTY cpuPageProperty,
D3D12_MEMORY_POOL memoryPoolPreference
)
{
if (data.size() > totalSize)
{
throw std::invalid_argument("Attempting to upload more data than the size of the buffer");
}

auto defaultBuffer = CreateDefaultBuffer(totalSize);
auto buffer = CreateBuffer(totalSize, cpuPageProperty, memoryPoolPreference);

if (!name.empty())
{
defaultBuffer->SetName(name.data());
buffer->SetName(name.data());
}

if (data.empty())
{
// No need to create an upload resource if the source data is empty.
return defaultBuffer;
return buffer;
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the heap is L0 and CPU-writable, there's no need to create the extra upload heap.

Suggested change
if (cpuPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE && memoryPoolPreference == D3D12_MEMORY_POOL_L0)
{
// Map buffer directly, if possible.
void* bufferData = nullptr;
THROW_IF_FAILED(buffer->Map(0, nullptr, &bufferData));
memcpy(bufferData, data.data(), data.size());
buffer->Unmap(0, nullptr);
return buffer;
}

auto uploadBuffer = CreateUploadBuffer(totalSize);
Expand All @@ -279,20 +315,20 @@ Microsoft::WRL::ComPtr<ID3D12Resource> Device::Upload(uint64_t totalSize, gsl::s
D3D12_RESOURCE_BARRIER barriers[] =
{
CD3DX12_RESOURCE_BARRIER::Transition(
defaultBuffer.Get(),
buffer.Get(),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
D3D12_RESOURCE_STATE_COPY_DEST)
};
m_commandList->ResourceBarrier(_countof(barriers), barriers);
}

m_commandList->CopyResource(defaultBuffer.Get(), uploadBuffer.Get());
m_commandList->CopyResource(buffer.Get(), uploadBuffer.Get());

{
D3D12_RESOURCE_BARRIER barriers[] =
{
CD3DX12_RESOURCE_BARRIER::Transition(
defaultBuffer.Get(),
buffer.Get(),
D3D12_RESOURCE_STATE_COPY_DEST,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS)
};
Expand All @@ -301,32 +337,32 @@ Microsoft::WRL::ComPtr<ID3D12Resource> Device::Upload(uint64_t totalSize, gsl::s

m_temporaryResources.push_back(std::move(uploadBuffer));

return defaultBuffer;
return buffer;
}

std::vector<std::byte> Device::Download(Microsoft::WRL::ComPtr<ID3D12Resource> defaultBuffer)
std::vector<std::byte> Device::Download(Microsoft::WRL::ComPtr<ID3D12Resource> buffer)
Copy link
Contributor

@jstoecker jstoecker Jun 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like upload, you can avoid the readback heap and just copy straight out of the buffer here if it's L0 and CPU-visible (call GetHeapProperties).

{
auto readbackBuffer = CreateReadbackBuffer(defaultBuffer->GetDesc().Width);
auto readbackBuffer = CreateReadbackBuffer(buffer->GetDesc().Width);
readbackBuffer->SetName(L"Device::Download");

{
D3D12_RESOURCE_BARRIER barriers[] =
{
CD3DX12_RESOURCE_BARRIER::Transition(
defaultBuffer.Get(),
buffer.Get(),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
D3D12_RESOURCE_STATE_COPY_SOURCE)
};
m_commandList->ResourceBarrier(_countof(barriers), barriers);
}

m_commandList->CopyResource(readbackBuffer.Get(), defaultBuffer.Get());
m_commandList->CopyResource(readbackBuffer.Get(), buffer.Get());

{
D3D12_RESOURCE_BARRIER barriers[] =
{
CD3DX12_RESOURCE_BARRIER::Transition(
defaultBuffer.Get(),
buffer.Get(),
D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS)
};
Expand All @@ -335,9 +371,9 @@ std::vector<std::byte> Device::Download(Microsoft::WRL::ComPtr<ID3D12Resource> d

ExecuteCommandListAndWait();

std::vector<std::byte> outputBuffer(defaultBuffer->GetDesc().Width);
std::vector<std::byte> outputBuffer(buffer->GetDesc().Width);
{
size_t dataSize = defaultBuffer->GetDesc().Width;
size_t dataSize = buffer->GetDesc().Width;
CD3DX12_RANGE readRange(0, gsl::narrow<size_t>(dataSize));
void* readbackBufferData = nullptr;
THROW_IF_FAILED(readbackBuffer->Map(0, &readRange, &readbackBufferData));
Expand Down
15 changes: 14 additions & 1 deletion DxDispatch/src/dxdispatch/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ class Device
uint64_t alignment = 0,
D3D12_HEAP_FLAGS heapFlags = D3D12_HEAP_FLAG_NONE);

Microsoft::WRL::ComPtr<ID3D12Resource> CreateBuffer(
uint64_t sizeInBytes,
D3D12_CPU_PAGE_PROPERTY cpuPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
D3D12_MEMORY_POOL memoryPoolPreference = D3D12_MEMORY_POOL_L0,
D3D12_RESOURCE_FLAGS resourceFlags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
uint64_t alignment = 0,
D3D12_HEAP_FLAGS heapFlags = D3D12_HEAP_FLAG_NONE);

Comment on lines +47 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! Can you also delete the TODO on line 39? :)

Microsoft::WRL::ComPtr<ID3D12Resource> CreateUploadBuffer(
uint64_t sizeInBytes,
D3D12_RESOURCE_FLAGS resourceFlags = D3D12_RESOURCE_FLAG_NONE,
Expand Down Expand Up @@ -88,7 +96,12 @@ class Device
m_temporaryResources.emplace_back(std::move(object));
}

Microsoft::WRL::ComPtr<ID3D12Resource> Upload(uint64_t totalSize, gsl::span<const std::byte> data, std::wstring_view name = {});
Microsoft::WRL::ComPtr<ID3D12Resource> CreateBuffer(
uint64_t totalSize,
gsl::span<const std::byte> data,
std::wstring_view name,
D3D12_CPU_PAGE_PROPERTY cpuPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE,
D3D12_MEMORY_POOL memoryPoolPreference = D3D12_MEMORY_POOL_L0);
Comment on lines +103 to +104
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would default these to D3D12_MEMORY_POOL_UNKNOWN and D3D12_MEMORY_POOL_UNKNOWN, respectively, since custom heaps are opt-in.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch - thanks. This was an artifact of a previous approach.


std::vector<std::byte> Download(Microsoft::WRL::ComPtr<ID3D12Resource>);

Expand Down
1 change: 0 additions & 1 deletion DxDispatch/src/dxdispatch/Dispatchable.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,4 @@ struct Dispatchable
virtual void Initialize() = 0;
virtual void Bind(const Bindings& bindings, uint32_t iteration) = 0;
virtual void Dispatch(const Model::DispatchCommand& args, uint32_t iteration) = 0;
virtual void Wait() = 0;
};
4 changes: 0 additions & 4 deletions DxDispatch/src/dxdispatch/DmlDispatchable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,5 @@ void DmlDispatchable::Bind(const Bindings& bindings, uint32_t iteration)
void DmlDispatchable::Dispatch(const Model::DispatchCommand& args, uint32_t iteration)
{
m_device->RecordDispatch(m_operatorCompiled.Get(), m_bindingTable.Get());
}

void DmlDispatchable::Wait()
{
m_device->ExecuteCommandListAndWait();
}
1 change: 0 additions & 1 deletion DxDispatch/src/dxdispatch/DmlDispatchable.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ class DmlDispatchable : public Dispatchable
void Initialize() final;
void Bind(const Bindings& bindings, uint32_t iteration) final;
void Dispatch(const Model::DispatchCommand& args, uint32_t iteration) final;
void Wait() final;

private:
std::string m_name;
Expand Down
Loading