Skip to content

Commit

Permalink
GPU frustum culling using compute shaders on PipelineDrawBatch.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Creighton committed Dec 10, 2022
1 parent bd80c98 commit e939b02
Show file tree
Hide file tree
Showing 34 changed files with 625 additions and 317 deletions.
122 changes: 95 additions & 27 deletions pxr/imaging/hdSt/codeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ TF_DEFINE_PRIVATE_TOKENS(
(early_fragment_tests)
);

TF_DEFINE_ENV_SETTING(HDST_ENABLE_HGI_RESOURCE_GENERATION, false,
TF_DEFINE_ENV_SETTING(HDST_ENABLE_HGI_RESOURCE_GENERATION, true,
"Enable Hgi resource generation for codeGen");

/* static */
Expand Down Expand Up @@ -1344,6 +1344,7 @@ HdSt_CodeGen::_GetShaderResourceLayouts(
HdShaderTokens->geometryShader,
HdShaderTokens->fragmentShader,
HdShaderTokens->postTessVertexShader,
HdShaderTokens->computeShader,
};

for (auto const &shader : shaders) {
Expand All @@ -1366,6 +1367,9 @@ HdSt_CodeGen::_GetShaderResourceLayouts(

HdSt_ResourceLayout::ParseLayout(
&_resPTVS, HdShaderTokens->postTessVertexShader, layoutDict);

HdSt_ResourceLayout::ParseLayout(
&_resCS, HdShaderTokens->computeShader, layoutDict);
}
}

Expand Down Expand Up @@ -1525,6 +1529,8 @@ HdSt_CodeGen::Compile(HdStResourceRegistry*const registry)
_geometricShader->GetSource(HdShaderTokens->geometryShader);
std::string fragmentShader =
_geometricShader->GetSource(HdShaderTokens->fragmentShader);
std::string computeShader =
_geometricShader->GetSource(HdShaderTokens->computeShader);

_hasVS = (!vertexShader.empty());
_hasTCS = (!tessControlShader.empty());
Expand All @@ -1533,6 +1539,7 @@ HdSt_CodeGen::Compile(HdStResourceRegistry*const registry)
_hasPTVS = (!postTessVertexShader.empty()) && metalTessellationEnabled;
_hasGS = (!geometryShader.empty()) && !metalTessellationEnabled;
_hasFS = (!fragmentShader.empty());
_hasCS = (!computeShader.empty());

// Initialize source buckets
_genDefines.str(""); _genDecl.str(""); _genAccessors.str("");
Expand Down Expand Up @@ -2014,6 +2021,7 @@ HdSt_CodeGen::Compile(HdStResourceRegistry*const registry)
_genPTVS << postTessVertexShader;
_genGS << geometryShader;
_genFS << fragmentShader;
_genCS << computeShader;

// Sanity check that if you provide a control shader, you have also provided
// an evaluation shader (and vice versa)
Expand Down Expand Up @@ -2645,6 +2653,8 @@ HdSt_CodeGen::_CompileWithGeneratedHgiResources(
HdShaderTokens->computeShader, _resAttrib, _metaData);
resourceGen._GenerateHgiResources(&csDesc,
HdShaderTokens->computeShader, _resCommon, _metaData);
resourceGen._GenerateHgiResources(&csDesc,
HdShaderTokens->computeShader, _resCS, _metaData);

std::string const declarations = _genDefines.str() + _genDecl.str();
std::string const source = _genAccessors.str() + _genCS.str();
Expand Down Expand Up @@ -3830,13 +3840,16 @@ HdSt_CodeGen::_GenerateDrawingCoord(
// layout (location=y) in ivec4 drawingCoord1
// layout (location=z) in ivec2 drawingCoord2
// layout (location=w) in int drawingCoordI[N]
_EmitDeclaration(&_resAttrib, _metaData.drawingCoord0Binding);
_EmitDeclaration(&_resAttrib, _metaData.drawingCoord1Binding);
_EmitDeclaration(&_resAttrib, _metaData.drawingCoord2Binding);
if (!_hasCS) {
_EmitDeclaration(&_resAttrib, _metaData.drawingCoord0Binding);
_EmitDeclaration(&_resAttrib, _metaData.drawingCoord1Binding);
_EmitDeclaration(&_resAttrib, _metaData.drawingCoord2Binding);

if (_metaData.drawingCoordIBinding.binding.IsValid()) {
_EmitDeclaration(&_resAttrib, _metaData.drawingCoordIBinding,
/*arraySize=*/std::max(1, _metaData.instancerNumLevels));

if (_metaData.drawingCoordIBinding.binding.IsValid()) {
_EmitDeclaration(&_resAttrib, _metaData.drawingCoordIBinding,
/*arraySize=*/std::max(1, _metaData.instancerNumLevels));
}
}

std::stringstream primitiveID;
Expand Down Expand Up @@ -3940,6 +3953,12 @@ HdSt_CodeGen::_GenerateDrawingCoord(
<< " return drawingCoord1[0].y + (int(hd_InstanceID) - "
<< "gl_BaseInstance) * HD_INSTANCE_INDEX_WIDTH; \n"
<< "}\n";

_genCS << "int g_instanceID; // Set from calling code.\n"
<< "FORWARD_DECL(int GetDrawingCoordField(uint coordIndex, uint fieldIndex));\n"
<< "int GetInstanceIndexCoord() {\n"
<< "return GetDrawingCoordField(1, 1) + g_instanceID * HD_INSTANCE_INDEX_WIDTH;\n"
<< "}\n";

if (_geometricShader->IsFrustumCullingPass()) {
// for frustum culling: use instanceIndices.
Expand All @@ -3957,6 +3976,23 @@ HdSt_CodeGen::_GenerateDrawingCoord(
<< " = instanceIndices[drawingCoord1.y + "
<< "hd_InstanceID*HD_INSTANCE_INDEX_WIDTH+i];\n"
<< "}\n";

if (_hasCS) {
_genCS << "hd_instanceIndex GetInstanceIndex() {\n"
<< " int offset = GetInstanceIndexCoord();\n"
<< " hd_instanceIndex r;\n"
<< " for (int i = 0; i < HD_INSTANCE_INDEX_WIDTH; ++i)\n"
<< " r.indices[i] = instanceIndices[offset+i];\n"
<< " return r;\n"
<< "}\n";
_genCS << "void SetCulledInstanceIndex(uint instanceID) {\n"
<< " int instanceIndex = GetDrawingCoordField(1, 1);\n"
<< " for (int i = 0; i < HD_INSTANCE_INDEX_WIDTH; ++i) {\n"
<< " culledInstanceIndices[instanceIndex + instanceID * HD_INSTANCE_INDEX_WIDTH+i]\n"
<< " = instanceIndices[instanceIndex + g_instanceID * HD_INSTANCE_INDEX_WIDTH+i];\n"
<< " }\n"
<< "}\n";
}
} else {
// for drawing: use culledInstanceIndices.
_EmitAccessor(_genVS, _metaData.culledInstanceIndexArrayBinding.name,
Expand All @@ -3983,28 +4019,33 @@ HdSt_CodeGen::_GenerateDrawingCoord(
genAttr << "void SetCulledInstanceIndex(uint instance) "
"{ /*no-op*/ }\n";
}

_genCS << "hd_instanceIndex GetInstanceIndex() {"
<< " hd_instanceIndex r; r.indices[0] = 0; return r; }\n";
}

for (std::string const & param : drawingCoordParams) {
TfToken const drawingCoordParamName("dc_" + param);
_AddInterstageElement(&_resInterstage,
HdSt_ResourceLayout::InOut::NONE,
/*name=*/drawingCoordParamName,
/*dataType=*/_tokens->_int);
}
for (int i = 0; i < instanceIndexWidth; ++i) {
TfToken const name(TfStringPrintf("dc_instanceIndexI%d", i));
_AddInterstageElement(&_resInterstage,
HdSt_ResourceLayout::InOut::NONE,
/*name=*/name,
/*dataType=*/_tokens->_int);
}
for (int i = 0; i < instanceIndexWidth; ++i) {
TfToken const name(TfStringPrintf("dc_instanceCoordsI%d", i));
_AddInterstageElement(&_resInterstage,
HdSt_ResourceLayout::InOut::NONE,
/*name=*/name,
/*dataType=*/_tokens->_int);
if (!_hasCS) {
for (std::string const & param : drawingCoordParams) {
TfToken const drawingCoordParamName("dc_" + param);
_AddInterstageElement(&_resInterstage,
HdSt_ResourceLayout::InOut::NONE,
/*name=*/drawingCoordParamName,
/*dataType=*/_tokens->_int);
}
for (int i = 0; i < instanceIndexWidth; ++i) {
TfToken const name(TfStringPrintf("dc_instanceIndexI%d", i));
_AddInterstageElement(&_resInterstage,
HdSt_ResourceLayout::InOut::NONE,
/*name=*/name,
/*dataType=*/_tokens->_int);
}
for (int i = 0; i < instanceIndexWidth; ++i) {
TfToken const name(TfStringPrintf("dc_instanceCoordsI%d", i));
_AddInterstageElement(&_resInterstage,
HdSt_ResourceLayout::InOut::NONE,
/*name=*/name,
/*dataType=*/_tokens->_int);
}
}

_genVS << genAttr.str();
Expand Down Expand Up @@ -4034,12 +4075,31 @@ HdSt_CodeGen::_GenerateDrawingCoord(
<< " dc.varyingCoord = drawingCoord2[0].y;\n"
<< " hd_instanceIndex r = GetInstanceIndex();\n";

_genCS << "// Compute shaders read the drawCommands buffer directly.\n"
<< "// GetDrawingCoordField() needs to be implemented by the\n"
<< "// kernel by offsetting by the thread ID.\n"
<< "FORWARD_DECL(int GetDrawingCoordField(uint coordIndex, uint fieldIndex));\n"
<< "hd_drawingCoord GetDrawingCoord() {\n"
<< " hd_drawingCoord dc;\n"
<< " dc.modelCoord = GetDrawingCoordField(0, 0);\n"
<< " dc.constantCoord = GetDrawingCoordField(0, 1);\n"
<< " dc.elementCoord = GetDrawingCoordField(0, 2);\n"
<< " dc.primitiveCoord = GetDrawingCoordField(0, 3);\n"
<< " dc.fvarCoord = GetDrawingCoordField(1, 0);\n"
<< " dc.shaderCoord = GetDrawingCoordField(1, 1);\n"
<< " dc.vertexCoord = GetDrawingCoordField(1, 2);\n"
<< " dc.topologyVisibilityCoord = GetDrawingCoordField(2, 0);\n"
<< " dc.varyingCoord = GetDrawingCoordField(2, 1);\n"
<< " hd_instanceIndex r = GetInstanceIndex();\n";

for(int i = 0; i < instanceIndexWidth; ++i) {
std::string const index = std::to_string(i);
_genVS << " dc.instanceIndex[" << index << "]"
<< " = r.indices[" << index << "];\n";
_genPTVS << " dc.instanceIndex[" << index << "]"
<< " = r.indices[" << index << "];\n";
_genCS << " dc.instanceIndex[" << index << "]"
<< " = r.indices[" << index << "];\n";
}
for(int i = 0; i < instanceIndexWidth-1; ++i) {
std::string const index = std::to_string(i);
Expand All @@ -4049,12 +4109,17 @@ HdSt_CodeGen::_GenerateDrawingCoord(
_genPTVS << " dc.instanceCoords[" << index << "]"
<< " = drawingCoordI" << index << "[0]"
<< " + dc.instanceIndex[" << std::to_string(i+1) << "];\n";
_genCS << " dc.instanceCoords[" << index << "]"
<< " = GetDrawingCoordField(3," << index << ")"
<< " + dc.instanceIndex[" << std::to_string(i+1) << "];\n";
}

_genVS << " return dc;\n"
<< "}\n";
_genPTVS << " return dc;\n"
<< "}\n";
_genCS << " return dc;\n"
<< "}\n";

// note: GL spec says tessellation input array size must be equal to
// gl_MaxPatchVertices, which is used for intrinsic declaration
Expand Down Expand Up @@ -4552,6 +4617,9 @@ HdSt_CodeGen::_GenerateElementPrimvar()
<< " + GetDrawingCoord().elementCoord;\n"
<< "}\n";
}
else if (_geometricShader->IsPrimTypeCompute()) {
// do nothing.
}
else {
TF_CODING_ERROR("HdSt_GeometricShader::PrimitiveType %d is "
"unexpected in _GenerateElementPrimvar().",
Expand Down
1 change: 1 addition & 0 deletions pxr/imaging/hdSt/codeGen.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class HdSt_CodeGen
ElementVector _resFS;
ElementVector _resPTCS;
ElementVector _resPTVS;
ElementVector _resCS;

ElementVector _resInterstage;

Expand Down
84 changes: 81 additions & 3 deletions pxr/imaging/hdSt/commandBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,30 @@
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//

#if defined(ARCH_OS_WINDOWS)
// On Windows, MemoryBarrier is defined to __faststorefence, so we have to
// remove it here so that it can be called on HgiComputeCmds.
// https://learn.microsoft.com/en-us/windows/win32/api/winnt/nf-winnt-memorybarrier
#undef MemoryBarrier
#endif

#include "pxr/imaging/hdSt/commandBuffer.h"
#include "pxr/imaging/hdSt/debugCodes.h"
#include "pxr/imaging/hdSt/geometricShader.h"
#include "pxr/imaging/hdSt/indirectDrawBatch.h"
#include "pxr/imaging/hdSt/pipelineDrawBatch.h"
#include "pxr/imaging/hdSt/renderPassState.h"
#include "pxr/imaging/hdSt/resourceRegistry.h"
#include "pxr/imaging/hdSt/materialNetworkShader.h"
#include "pxr/imaging/hdSt/materialParam.h"

#include "pxr/imaging/hgi/capabilities.h"
#include "pxr/imaging/hgi/computeCmds.h"

#include "pxr/imaging/hd/bufferArrayRange.h"
#include "pxr/imaging/hd/perfLog.h"
#include "pxr/imaging/hd/renderIndex.h"
#include "pxr/imaging/hd/tokens.h"

#include "pxr/base/gf/matrix4f.h"
Expand Down Expand Up @@ -78,18 +89,38 @@ void
HdStCommandBuffer::PrepareDraw(
HgiGraphicsCmds *gfxCmds,
HdStRenderPassStateSharedPtr const &renderPassState,
HdStResourceRegistrySharedPtr const &resourceRegistry)
HdRenderIndex *renderIndex)
{
HD_TRACE_FUNCTION();

// Downcast the resource registry
HdStResourceRegistrySharedPtr const& resourceRegistry =
std::dynamic_pointer_cast<HdStResourceRegistry>(
renderIndex->GetResourceRegistry());
TF_VERIFY(resourceRegistry);

_FrustumCull(renderPassState, renderIndex);

for (auto const& batch : _drawBatches) {
batch->PrepareDraw(gfxCmds, renderPassState, resourceRegistry);
}

// Once all the prepare work is done, add a memory barrier before the next
// stage.
HgiComputeCmds *computeCmds = resourceRegistry->GetGlobalComputeCmds(
HgiComputeDispatchConcurrent);

computeCmds->MemoryBarrier(HgiMemoryBarrierAll);

for (auto const& batch : _drawBatches) {
batch->BeforeDraw(renderPassState, resourceRegistry);
}

//
// Compute work that was set up for indirect command buffers and frustum
// culling in the batch preparation is submitted to device.
//
computeCmds->MemoryBarrier(HgiMemoryBarrierAll);
resourceRegistry->SubmitComputeWork();
}

Expand All @@ -108,7 +139,7 @@ HdStCommandBuffer::ExecuteDraw(
// Reset per-commandBuffer performance counters, updated by batch execution
HD_PERF_COUNTER_SET(HdPerfTokens->drawCalls, 0);
HD_PERF_COUNTER_SET(HdTokens->itemsDrawn, 0);

//
// draw batches
//
Expand Down Expand Up @@ -362,7 +393,54 @@ HdStCommandBuffer::SyncDrawItemVisibility(unsigned visChangeCount)
}

void
HdStCommandBuffer::FrustumCull(GfMatrix4d const &viewProjMatrix)
HdStCommandBuffer::_FrustumCull(
HdStRenderPassStateSharedPtr const &renderPassState,
HdRenderIndex const *renderIndex)
{
// Downcast the resource registry
HdStResourceRegistrySharedPtr const& resourceRegistry =
std::dynamic_pointer_cast<HdStResourceRegistry>(
renderIndex->GetResourceRegistry());
TF_VERIFY(resourceRegistry);

Hgi *hgi = resourceRegistry->GetHgi();
HgiCapabilities const *capabilities = hgi->GetCapabilities();

const bool multiDrawIndirectEnabled =
capabilities->IsSet(HgiDeviceCapabilitiesBitsMultiDrawIndirect);

const bool gpuFrustumCullingEnabled =
HdSt_PipelineDrawBatch::IsEnabled(capabilities) ?
HdSt_PipelineDrawBatch::IsEnabledGPUFrustumCulling() :
HdSt_IndirectDrawBatch::IsEnabledGPUFrustumCulling();

const bool skipCulling = TfDebug::IsEnabled(HDST_DISABLE_FRUSTUM_CULLING) ||
(multiDrawIndirectEnabled && gpuFrustumCullingEnabled);

const bool freezeCulling = TfDebug::IsEnabled(HD_FREEZE_CULL_FRUSTUM);

if (skipCulling) {
HdChangeTracker const &tracker = renderIndex->GetChangeTracker();
// Since culling state is stored across renders,
// we need to update all items visible state
SyncDrawItemVisibility(tracker.GetVisibilityChangeCount());

TF_DEBUG(HD_DRAWITEMS_CULLED).Msg("CULLED: skipped\n");
}
else {
if (!freezeCulling) {
_FrustumCullCPU(renderPassState->GetCullMatrix());
}

if (TfDebug::IsEnabled(HD_DRAWITEMS_CULLED)) {
TF_DEBUG(HD_DRAWITEMS_CULLED).Msg("CULLED: %zu drawItems\n",
GetCulledSize());
}
}
}

void
HdStCommandBuffer::_FrustumCullCPU(GfMatrix4d const &viewProjMatrix)
{
HD_TRACE_FUNCTION();

Expand Down
Loading

0 comments on commit e939b02

Please sign in to comment.