Skip to content

Commit

Permalink
Remove the method isTessOffChip
Browse files Browse the repository at this point in the history
On GFX9+, tessellation is always in off-chip mode. Since we just support
GFX10+, the method isTessOffChip always returns TRUE for us.
  • Loading branch information
amdrexu committed Dec 5, 2023
1 parent 0a7397e commit fe8ddc5
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 114 deletions.
3 changes: 0 additions & 3 deletions lgc/include/lgc/state/PipelineState.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,6 @@ class PipelineState final : public Pipeline {
const RasterizerState &getRasterizerState() const { return m_rasterizerState; }
const DepthStencilState &getDepthStencilState() const { return m_depthStencilState; }

// Determine whether to use off-chip tessellation mode
bool isTessOffChip();

// Set GS on-chip mode
void setGsOnChip(bool gsOnChip) { m_gsOnChip = gsOnChip; }

Expand Down
10 changes: 2 additions & 8 deletions lgc/patch/Gfx9ConfigBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -953,9 +953,7 @@ template <typename T> void ConfigBuilder::buildVsRegConfig(ShaderStage shaderSta
SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT, 2);
}

if (m_pipelineState->isTessOffChip()) {
SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, OC_LDS_EN, true);
}
SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, OC_LDS_EN, true);
}

setupPaSpecificRegisters(&config->vsRegs);
Expand Down Expand Up @@ -1016,7 +1014,6 @@ void ConfigBuilder::buildLsHsRegConfig(ShaderStage shaderStage1, ShaderStage sha
SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, USER_SGPR, userDataCount);

const auto &calcFactor = tcsResUsage->inOutUsage.tcs.calcFactor;
assert(m_pipelineState->isTessOffChip()); // Must be off-chip on GFX9+

const unsigned ldsSizeDwordGranularityShift =
m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift;
Expand Down Expand Up @@ -2055,10 +2052,7 @@ void ConfigBuilder::setupVgtTfParam(LsHsRegConfig *config) {
SET_REG_FIELD(config, VGT_TF_PARAM, TYPE, primType);
SET_REG_FIELD(config, VGT_TF_PARAM, PARTITIONING, partition);
SET_REG_FIELD(config, VGT_TF_PARAM, TOPOLOGY, topology);

if (m_pipelineState->isTessOffChip()) {
SET_REG_FIELD(config, VGT_TF_PARAM, DISTRIBUTION_MODE, TRAPEZOIDS);
}
SET_REG_FIELD(config, VGT_TF_PARAM, DISTRIBUTION_MODE, TRAPEZOIDS);
}

// =====================================================================================================================
Expand Down
24 changes: 9 additions & 15 deletions lgc/patch/NggPrimShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3117,11 +3117,9 @@ void NggPrimShader::runEs(ArrayRef<Argument *> args) {

if (m_hasTes) {
// Set up system value SGPRs
if (m_pipelineState->isTessOffChip()) {
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
esArgs.push_back(m_hasGs ? offChipLdsBase : isOffChip);
esArgs.push_back(m_hasGs ? isOffChip : offChipLdsBase);
}
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
esArgs.push_back(m_hasGs ? offChipLdsBase : isOffChip);
esArgs.push_back(m_hasGs ? isOffChip : offChipLdsBase);

if (m_hasGs)
esArgs.push_back(esGsOffset);
Expand Down Expand Up @@ -3335,11 +3333,9 @@ Value *NggPrimShader::runPartEs(ArrayRef<Argument *> args, Value *position) {

if (m_hasTes) {
// Set up system value SGPRs
if (m_pipelineState->isTessOffChip()) {
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
partEsArgs.push_back(isOffChip);
partEsArgs.push_back(offChipLdsBase);
}
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
partEsArgs.push_back(isOffChip);
partEsArgs.push_back(offChipLdsBase);

// Set up system value VGPRs
partEsArgs.push_back(tessCoordX);
Expand Down Expand Up @@ -7510,11 +7506,9 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef<Argument *> args

if (m_hasTes) {
// Set up system value SGPRs
if (m_pipelineState->isTessOffChip()) {
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
xfbFetcherArgs.push_back(isOffChip);
xfbFetcherArgs.push_back(offChipLdsBase);
}
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
xfbFetcherArgs.push_back(isOffChip);
xfbFetcherArgs.push_back(offChipLdsBase);

// Set up system value VGPRs
xfbFetcherArgs.push_back(tessCoordX);
Expand Down
95 changes: 37 additions & 58 deletions lgc/patch/PatchInOutImportExport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,18 +371,11 @@ void PatchInOutImportExport::processShader() {
calcFactor.outPatchSize = outPatchSize;
calcFactor.inPatchSize = inPatchSize;

// NOTE: Tess factors are always stored to on-chip LDS first. Then, they are store to TF buffer and on-chip LDS
// or off-chip LDS buffer (which will be loaded by TES).
if (m_pipelineState->isTessOffChip()) {
calcFactor.offChip.outPatchStart = 0;
calcFactor.offChip.patchConstStart = calcFactor.offChip.outPatchStart + outPatchTotalSize;

calcFactor.onChip.tessFactorStart = inPatchTotalSize;
} else {
calcFactor.onChip.outPatchStart = inPatchTotalSize;
calcFactor.onChip.patchConstStart = calcFactor.onChip.outPatchStart + outPatchTotalSize;
calcFactor.onChip.tessFactorStart = calcFactor.onChip.patchConstStart + patchConstTotalSize;
}
// NOTE: Tess factors are always stored to on-chip LDS first. Then, they are store to TF buffer and off-chip
// LDS buffer (which will be loaded by TES).
calcFactor.offChip.outPatchStart = 0;
calcFactor.offChip.patchConstStart = calcFactor.offChip.outPatchStart + outPatchTotalSize;
calcFactor.onChip.tessFactorStart = inPatchTotalSize;

calcFactor.tessFactorStride = tessFactorStride;
calcFactor.tessOnChipLdsSize = calcFactor.onChip.tessFactorStart + tessFactorTotalSize;
Expand Down Expand Up @@ -430,16 +423,12 @@ void PatchInOutImportExport::processShader() {
LLPC_OUTS("Output vertex count: " << outVertexCount << "\n");
LLPC_OUTS("Output vertex stride: " << calcFactor.outVertexStride << "\n");
LLPC_OUTS("Output patch size (in dwords): " << outPatchSize << "\n");
LLPC_OUTS("Output patch start: " << (m_pipelineState->isTessOffChip() ? calcFactor.offChip.outPatchStart
: calcFactor.onChip.outPatchStart)
<< (m_pipelineState->isTessOffChip() ? " (LDS buffer)" : "(LDS)") << "\n");
LLPC_OUTS("Output patch start: " << calcFactor.offChip.outPatchStart << " (LDS buffer)\n");
LLPC_OUTS("Output patch total size (in dwords): " << outPatchTotalSize << "\n");
LLPC_OUTS("\n");
LLPC_OUTS("Patch constant count: " << patchConstCount << "\n");
LLPC_OUTS("Patch constant size (in dwords): " << calcFactor.patchConstSize << "\n");
LLPC_OUTS("Patch constant start: " << (m_pipelineState->isTessOffChip() ? calcFactor.offChip.patchConstStart
: calcFactor.onChip.patchConstStart)
<< (m_pipelineState->isTessOffChip() ? " (LDS buffer)" : "(LDS)") << "\n");
LLPC_OUTS("Patch constant start: " << calcFactor.offChip.patchConstStart << " (LDS buffer)\n");
LLPC_OUTS("Patch constant total size (in dwords): " << patchConstTotalSize << "\n");
LLPC_OUTS("\n");
LLPC_OUTS("Tess factor start: " << calcFactor.onChip.tessFactorStart << " (LDS)\n");
Expand Down Expand Up @@ -1606,7 +1595,7 @@ Value *PatchInOutImportExport::patchTesGenericInputImport(Type *inputTy, unsigne
assert(compIdx);

auto ldsOffset = calcLdsOffsetForTesInput(inputTy, location, locOffset, compIdx, vertexIdx, builder);
return readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
return readValueFromLds(true, inputTy, ldsOffset, builder);
}

// =====================================================================================================================
Expand Down Expand Up @@ -1963,7 +1952,7 @@ Value *PatchInOutImportExport::patchTcsGenericOutputImport(Type *outputTy, unsig
Value *compIdx, Value *vertexIdx, BuilderBase &builder) {
assert(compIdx);
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, location, locOffset, compIdx, vertexIdx, builder);
return readValueFromLds(m_pipelineState->isTessOffChip(), outputTy, ldsOffset, builder);
return readValueFromLds(true, outputTy, ldsOffset, builder);
}

// =====================================================================================================================
Expand Down Expand Up @@ -2016,7 +2005,7 @@ void PatchInOutImportExport::patchTcsGenericOutputExport(Value *output, unsigned
assert(compIdx);
Type *outputTy = output->getType();
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, location, locOffset, compIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);
}

// =====================================================================================================================
Expand Down Expand Up @@ -2230,7 +2219,7 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
const unsigned loc = builtInInLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, elemIdx, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);

break;
}
Expand All @@ -2242,7 +2231,7 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
const unsigned loc = builtInInLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, nullptr, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);

break;
}
Expand All @@ -2259,12 +2248,12 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
for (unsigned i = 0; i < inputTy->getArrayNumElements(); ++i) {
auto elemIdx = builder.getInt32(i);
auto ldsOffset = calcLdsOffsetForTesInput(elemTy, loc, nullptr, elemIdx, vertexIdx, builder);
auto elem = readValueFromLds(m_pipelineState->isTessOffChip(), elemTy, ldsOffset, builder);
auto elem = readValueFromLds(true, elemTy, ldsOffset, builder);
input = builder.CreateInsertValue(input, elem, {i});
}
} else {
auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, elemIdx, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -2307,12 +2296,12 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
for (unsigned i = 0; i < inputTy->getArrayNumElements(); ++i) {
auto elemIdx = builder.getInt32(i);
auto ldsOffset = calcLdsOffsetForTesInput(elemTy, loc, nullptr, elemIdx, vertexIdx, builder);
auto elem = readValueFromLds(m_pipelineState->isTessOffChip(), elemTy, ldsOffset, builder);
auto elem = readValueFromLds(true, elemTy, ldsOffset, builder);
input = builder.CreateInsertValue(input, elem, {i});
}
} else {
auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, elemIdx, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -2825,7 +2814,7 @@ Value *PatchInOutImportExport::patchTcsBuiltInOutputImport(Type *outputTy, unsig
unsigned loc = builtInOutLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
output = readValueFromLds(m_pipelineState->isTessOffChip(), outputTy, ldsOffset, builder);
output = readValueFromLds(true, outputTy, ldsOffset, builder);

break;
}
Expand All @@ -2851,12 +2840,12 @@ Value *PatchInOutImportExport::patchTcsBuiltInOutputImport(Type *outputTy, unsig
for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) {
auto elemIdx = builder.getInt32(i);
auto ldsOffset = calcLdsOffsetForTcsOutput(elemTy, loc, nullptr, elemIdx, vertexIdx, builder);
auto elem = readValueFromLds(m_pipelineState->isTessOffChip(), elemTy, ldsOffset, builder);
auto elem = readValueFromLds(true, elemTy, ldsOffset, builder);
output = builder.CreateInsertValue(output, elem, {i});
}
} else {
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
output = readValueFromLds(m_pipelineState->isTessOffChip(), outputTy, ldsOffset, builder);
output = readValueFromLds(true, outputTy, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -3102,7 +3091,7 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned
unsigned loc = builtInOutLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);

break;
}
Expand All @@ -3123,11 +3112,11 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned
auto elem = ExtractValueInst::Create(output, {i}, "", insertPos);
auto elemIdx = ConstantInt::get(Type::getInt32Ty(*m_context), i);
auto ldsOffset = calcLdsOffsetForTcsOutput(elem->getType(), loc, nullptr, elemIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), elem, ldsOffset, builder);
writeValueToLds(true, elem, ldsOffset, builder);
}
} else {
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -3168,12 +3157,12 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned
for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) {
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, builder.getInt32(i), nullptr, builder);
auto elem = builder.CreateExtractValue(output, {i});
writeValueToLds(m_pipelineState->isTessOffChip(), elem, ldsOffset, builder);
writeValueToLds(true, elem, ldsOffset, builder);
}
} else {
// Handle a single element of tessLevelOuter array
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, nullptr, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);
}
}

Expand Down Expand Up @@ -4545,11 +4534,8 @@ Value *PatchInOutImportExport::calcLdsOffsetForTcsOutput(Type *outputTy, unsigne
const auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStageTessControl)->inOutUsage.tcs;
const auto &calcFactor = inOutUsage.calcFactor;

auto outPatchStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.outPatchStart : calcFactor.onChip.outPatchStart;

auto patchConstStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.patchConstStart : calcFactor.onChip.patchConstStart;
auto outPatchStart = calcFactor.offChip.outPatchStart;
auto patchConstStart = calcFactor.offChip.patchConstStart;

// attribOffset = (location + locOffset) * 4 + compIdx * bitWidth / 32
Value *attribOffset = builder.getInt32(location);
Expand Down Expand Up @@ -4617,11 +4603,8 @@ Value *PatchInOutImportExport::calcLdsOffsetForTesInput(Type *inputTy, unsigned

const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStageTessControl)->inOutUsage.tcs.calcFactor;

auto outPatchStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.outPatchStart : calcFactor.onChip.outPatchStart;

auto patchConstStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.patchConstStart : calcFactor.onChip.patchConstStart;
auto outPatchStart = calcFactor.offChip.outPatchStart;
auto patchConstStart = calcFactor.offChip.patchConstStart;

const auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(m_shaderStage)->entryArgIdxs.tes;

Expand Down Expand Up @@ -4735,12 +4718,10 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC

patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, optimalPatchCountPerThreadGroup);

if (m_pipelineState->isTessOffChip()) {
auto outPatchLdsBufferSize = (outPatchSize + patchConstSize) * 4;
auto tessOffChipPatchCountPerThreadGroup =
m_pipelineState->getTargetInfo().getGpuProperty().tessOffChipLdsBufferSize / outPatchLdsBufferSize;
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, tessOffChipPatchCountPerThreadGroup);
}
auto outPatchLdsBufferSize = (outPatchSize + patchConstSize) * 4;
auto tessOffChipPatchCountPerThreadGroup =
m_pipelineState->getTargetInfo().getGpuProperty().tessOffChipLdsBufferSize / outPatchLdsBufferSize;
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, tessOffChipPatchCountPerThreadGroup);

// TF-Buffer-based limit for Patchers per Thread Group:
// ---------------------------------------------------------------------------------------------
Expand All @@ -4758,13 +4739,11 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC

patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, tfBufferPatchCountLimit);

if (m_pipelineState->isTessOffChip()) {
// For all-offchip tessellation, we need to write an additional 4-byte TCS control word to the TF buffer whenever
// the patch-ID is zero.
const unsigned offChipTfBufferPatchCountLimit =
(tfBufferSizeInBytes - (patchCountPerThreadGroup * sizeof(unsigned))) / (tessFactorStride * sizeof(unsigned));
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, offChipTfBufferPatchCountLimit);
}
// For all-offchip tessellation, we need to write an additional 4-byte TCS control word to the TF buffer whenever
// the patch-ID is zero.
const unsigned offChipTfBufferPatchCountLimit =
(tfBufferSizeInBytes - (patchCountPerThreadGroup * sizeof(unsigned))) / (tessFactorStride * sizeof(unsigned));
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, offChipTfBufferPatchCountLimit);

return patchCountPerThreadGroup;
}
Expand Down
7 changes: 2 additions & 5 deletions lgc/patch/RegisterMetadataBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ void RegisterMetadataBuilder::buildLsHsRegisters() {
getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::LsVgprCompCnt] = lsVgprCompCnt;

// Set LDS_SIZE of SPI_SHADER_PGM_RSRC2_HS
assert(m_pipelineState->isTessOffChip()); // Must be off-chip on GFX9+
unsigned ldsSizeInDwords = calcFactor.tessOnChipLdsSize;
ldsSizeInDwords += calcFactor.rayQueryLdsStackSize;

Expand Down Expand Up @@ -743,8 +742,7 @@ void RegisterMetadataBuilder::buildHwVsRegisters() {
else
getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::VsVgprCompCnt] = 2;

if (m_pipelineState->isTessOffChip())
getHwShaderNode(Util::Abi::HardwareStage::Vs)[Util::Abi::HardwareStageMetadataKey::OffchipLdsEn] = true;
getHwShaderNode(Util::Abi::HardwareStage::Vs)[Util::Abi::HardwareStageMetadataKey::OffchipLdsEn] = true;
}
}

Expand Down Expand Up @@ -1563,8 +1561,7 @@ void RegisterMetadataBuilder::setVgtTfParam() {
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::Type] = primType;
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::Partitioning] = partition;
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::Topology] = topology;
if (m_pipelineState->isTessOffChip())
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::DistributionMode] = TRAPEZOIDS;
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::DistributionMode] = TRAPEZOIDS;
}

// =====================================================================================================================
Expand Down
Loading

0 comments on commit fe8ddc5

Please sign in to comment.