Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove the method isTessOffChip #2860

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions lgc/include/lgc/state/PipelineState.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,6 @@ class PipelineState final : public Pipeline {
const RasterizerState &getRasterizerState() const { return m_rasterizerState; }
const DepthStencilState &getDepthStencilState() const { return m_depthStencilState; }

// Determine whether to use off-chip tessellation mode
bool isTessOffChip();

// Set GS on-chip mode
void setGsOnChip(bool gsOnChip) { m_gsOnChip = gsOnChip; }

Expand Down
10 changes: 2 additions & 8 deletions lgc/patch/Gfx9ConfigBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -953,9 +953,7 @@ template <typename T> void ConfigBuilder::buildVsRegConfig(ShaderStage shaderSta
SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT, 2);
}

if (m_pipelineState->isTessOffChip()) {
SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, OC_LDS_EN, true);
}
SET_REG_FIELD(&config->vsRegs, SPI_SHADER_PGM_RSRC2_VS, OC_LDS_EN, true);
}

setupPaSpecificRegisters(&config->vsRegs);
Expand Down Expand Up @@ -1016,7 +1014,6 @@ void ConfigBuilder::buildLsHsRegConfig(ShaderStage shaderStage1, ShaderStage sha
SET_REG_FIELD(&config->lsHsRegs, SPI_SHADER_PGM_RSRC2_HS, USER_SGPR, userDataCount);

const auto &calcFactor = tcsResUsage->inOutUsage.tcs.calcFactor;
assert(m_pipelineState->isTessOffChip()); // Must be off-chip on GFX9+

const unsigned ldsSizeDwordGranularityShift =
m_pipelineState->getTargetInfo().getGpuProperty().ldsSizeDwordGranularityShift;
Expand Down Expand Up @@ -2055,10 +2052,7 @@ void ConfigBuilder::setupVgtTfParam(LsHsRegConfig *config) {
SET_REG_FIELD(config, VGT_TF_PARAM, TYPE, primType);
SET_REG_FIELD(config, VGT_TF_PARAM, PARTITIONING, partition);
SET_REG_FIELD(config, VGT_TF_PARAM, TOPOLOGY, topology);

if (m_pipelineState->isTessOffChip()) {
SET_REG_FIELD(config, VGT_TF_PARAM, DISTRIBUTION_MODE, TRAPEZOIDS);
}
SET_REG_FIELD(config, VGT_TF_PARAM, DISTRIBUTION_MODE, TRAPEZOIDS);
}

// =====================================================================================================================
Expand Down
24 changes: 9 additions & 15 deletions lgc/patch/NggPrimShader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3117,11 +3117,9 @@ void NggPrimShader::runEs(ArrayRef<Argument *> args) {

if (m_hasTes) {
// Set up system value SGPRs
if (m_pipelineState->isTessOffChip()) {
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
esArgs.push_back(m_hasGs ? offChipLdsBase : isOffChip);
esArgs.push_back(m_hasGs ? isOffChip : offChipLdsBase);
}
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
esArgs.push_back(m_hasGs ? offChipLdsBase : isOffChip);
esArgs.push_back(m_hasGs ? isOffChip : offChipLdsBase);

if (m_hasGs)
esArgs.push_back(esGsOffset);
Expand Down Expand Up @@ -3335,11 +3333,9 @@ Value *NggPrimShader::runPartEs(ArrayRef<Argument *> args, Value *position) {

if (m_hasTes) {
// Set up system value SGPRs
if (m_pipelineState->isTessOffChip()) {
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
partEsArgs.push_back(isOffChip);
partEsArgs.push_back(offChipLdsBase);
}
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
partEsArgs.push_back(isOffChip);
partEsArgs.push_back(offChipLdsBase);

// Set up system value VGPRs
partEsArgs.push_back(tessCoordX);
Expand Down Expand Up @@ -7510,11 +7506,9 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef<Argument *> args

if (m_hasTes) {
// Set up system value SGPRs
if (m_pipelineState->isTessOffChip()) {
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
xfbFetcherArgs.push_back(isOffChip);
xfbFetcherArgs.push_back(offChipLdsBase);
}
Value *isOffChip = PoisonValue::get(m_builder.getInt32Ty()); // Unused
xfbFetcherArgs.push_back(isOffChip);
xfbFetcherArgs.push_back(offChipLdsBase);

// Set up system value VGPRs
xfbFetcherArgs.push_back(tessCoordX);
Expand Down
95 changes: 37 additions & 58 deletions lgc/patch/PatchInOutImportExport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,18 +371,11 @@ void PatchInOutImportExport::processShader() {
calcFactor.outPatchSize = outPatchSize;
calcFactor.inPatchSize = inPatchSize;

// NOTE: Tess factors are always stored to on-chip LDS first. Then, they are store to TF buffer and on-chip LDS
// or off-chip LDS buffer (which will be loaded by TES).
if (m_pipelineState->isTessOffChip()) {
calcFactor.offChip.outPatchStart = 0;
calcFactor.offChip.patchConstStart = calcFactor.offChip.outPatchStart + outPatchTotalSize;

calcFactor.onChip.tessFactorStart = inPatchTotalSize;
} else {
calcFactor.onChip.outPatchStart = inPatchTotalSize;
calcFactor.onChip.patchConstStart = calcFactor.onChip.outPatchStart + outPatchTotalSize;
calcFactor.onChip.tessFactorStart = calcFactor.onChip.patchConstStart + patchConstTotalSize;
}
// NOTE: Tess factors are always stored to on-chip LDS first. Then, they are store to TF buffer and off-chip
// LDS buffer (which will be loaded by TES).
calcFactor.offChip.outPatchStart = 0;
calcFactor.offChip.patchConstStart = calcFactor.offChip.outPatchStart + outPatchTotalSize;
calcFactor.onChip.tessFactorStart = inPatchTotalSize;

calcFactor.tessFactorStride = tessFactorStride;
calcFactor.tessOnChipLdsSize = calcFactor.onChip.tessFactorStart + tessFactorTotalSize;
Expand Down Expand Up @@ -430,16 +423,12 @@ void PatchInOutImportExport::processShader() {
LLPC_OUTS("Output vertex count: " << outVertexCount << "\n");
LLPC_OUTS("Output vertex stride: " << calcFactor.outVertexStride << "\n");
LLPC_OUTS("Output patch size (in dwords): " << outPatchSize << "\n");
LLPC_OUTS("Output patch start: " << (m_pipelineState->isTessOffChip() ? calcFactor.offChip.outPatchStart
: calcFactor.onChip.outPatchStart)
<< (m_pipelineState->isTessOffChip() ? " (LDS buffer)" : "(LDS)") << "\n");
LLPC_OUTS("Output patch start: " << calcFactor.offChip.outPatchStart << " (LDS buffer)\n");
LLPC_OUTS("Output patch total size (in dwords): " << outPatchTotalSize << "\n");
LLPC_OUTS("\n");
LLPC_OUTS("Patch constant count: " << patchConstCount << "\n");
LLPC_OUTS("Patch constant size (in dwords): " << calcFactor.patchConstSize << "\n");
LLPC_OUTS("Patch constant start: " << (m_pipelineState->isTessOffChip() ? calcFactor.offChip.patchConstStart
: calcFactor.onChip.patchConstStart)
<< (m_pipelineState->isTessOffChip() ? " (LDS buffer)" : "(LDS)") << "\n");
LLPC_OUTS("Patch constant start: " << calcFactor.offChip.patchConstStart << " (LDS buffer)\n");
LLPC_OUTS("Patch constant total size (in dwords): " << patchConstTotalSize << "\n");
LLPC_OUTS("\n");
LLPC_OUTS("Tess factor start: " << calcFactor.onChip.tessFactorStart << " (LDS)\n");
Expand Down Expand Up @@ -1606,7 +1595,7 @@ Value *PatchInOutImportExport::patchTesGenericInputImport(Type *inputTy, unsigne
assert(compIdx);

auto ldsOffset = calcLdsOffsetForTesInput(inputTy, location, locOffset, compIdx, vertexIdx, builder);
return readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
return readValueFromLds(true, inputTy, ldsOffset, builder);
}

// =====================================================================================================================
Expand Down Expand Up @@ -1963,7 +1952,7 @@ Value *PatchInOutImportExport::patchTcsGenericOutputImport(Type *outputTy, unsig
Value *compIdx, Value *vertexIdx, BuilderBase &builder) {
assert(compIdx);
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, location, locOffset, compIdx, vertexIdx, builder);
return readValueFromLds(m_pipelineState->isTessOffChip(), outputTy, ldsOffset, builder);
return readValueFromLds(true, outputTy, ldsOffset, builder);
}

// =====================================================================================================================
Expand Down Expand Up @@ -2016,7 +2005,7 @@ void PatchInOutImportExport::patchTcsGenericOutputExport(Value *output, unsigned
assert(compIdx);
Type *outputTy = output->getType();
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, location, locOffset, compIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);
}

// =====================================================================================================================
Expand Down Expand Up @@ -2230,7 +2219,7 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
const unsigned loc = builtInInLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, elemIdx, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);

break;
}
Expand All @@ -2242,7 +2231,7 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
const unsigned loc = builtInInLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, nullptr, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);

break;
}
Expand All @@ -2259,12 +2248,12 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
for (unsigned i = 0; i < inputTy->getArrayNumElements(); ++i) {
auto elemIdx = builder.getInt32(i);
auto ldsOffset = calcLdsOffsetForTesInput(elemTy, loc, nullptr, elemIdx, vertexIdx, builder);
auto elem = readValueFromLds(m_pipelineState->isTessOffChip(), elemTy, ldsOffset, builder);
auto elem = readValueFromLds(true, elemTy, ldsOffset, builder);
input = builder.CreateInsertValue(input, elem, {i});
}
} else {
auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, elemIdx, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -2307,12 +2296,12 @@ Value *PatchInOutImportExport::patchTesBuiltInInputImport(Type *inputTy, unsigne
for (unsigned i = 0; i < inputTy->getArrayNumElements(); ++i) {
auto elemIdx = builder.getInt32(i);
auto ldsOffset = calcLdsOffsetForTesInput(elemTy, loc, nullptr, elemIdx, vertexIdx, builder);
auto elem = readValueFromLds(m_pipelineState->isTessOffChip(), elemTy, ldsOffset, builder);
auto elem = readValueFromLds(true, elemTy, ldsOffset, builder);
input = builder.CreateInsertValue(input, elem, {i});
}
} else {
auto ldsOffset = calcLdsOffsetForTesInput(inputTy, loc, nullptr, elemIdx, vertexIdx, builder);
input = readValueFromLds(m_pipelineState->isTessOffChip(), inputTy, ldsOffset, builder);
input = readValueFromLds(true, inputTy, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -2825,7 +2814,7 @@ Value *PatchInOutImportExport::patchTcsBuiltInOutputImport(Type *outputTy, unsig
unsigned loc = builtInOutLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
output = readValueFromLds(m_pipelineState->isTessOffChip(), outputTy, ldsOffset, builder);
output = readValueFromLds(true, outputTy, ldsOffset, builder);

break;
}
Expand All @@ -2851,12 +2840,12 @@ Value *PatchInOutImportExport::patchTcsBuiltInOutputImport(Type *outputTy, unsig
for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) {
auto elemIdx = builder.getInt32(i);
auto ldsOffset = calcLdsOffsetForTcsOutput(elemTy, loc, nullptr, elemIdx, vertexIdx, builder);
auto elem = readValueFromLds(m_pipelineState->isTessOffChip(), elemTy, ldsOffset, builder);
auto elem = readValueFromLds(true, elemTy, ldsOffset, builder);
output = builder.CreateInsertValue(output, elem, {i});
}
} else {
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
output = readValueFromLds(m_pipelineState->isTessOffChip(), outputTy, ldsOffset, builder);
output = readValueFromLds(true, outputTy, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -3102,7 +3091,7 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned
unsigned loc = builtInOutLocMap.find(builtInId)->second;

auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);

break;
}
Expand All @@ -3123,11 +3112,11 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned
auto elem = ExtractValueInst::Create(output, {i}, "", insertPos);
auto elemIdx = ConstantInt::get(Type::getInt32Ty(*m_context), i);
auto ldsOffset = calcLdsOffsetForTcsOutput(elem->getType(), loc, nullptr, elemIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), elem, ldsOffset, builder);
writeValueToLds(true, elem, ldsOffset, builder);
}
} else {
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, vertexIdx, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);
}

break;
Expand Down Expand Up @@ -3168,12 +3157,12 @@ void PatchInOutImportExport::patchTcsBuiltInOutputExport(Value *output, unsigned
for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) {
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, builder.getInt32(i), nullptr, builder);
auto elem = builder.CreateExtractValue(output, {i});
writeValueToLds(m_pipelineState->isTessOffChip(), elem, ldsOffset, builder);
writeValueToLds(true, elem, ldsOffset, builder);
}
} else {
// Handle a single element of tessLevelOuter array
auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, elemIdx, nullptr, builder);
writeValueToLds(m_pipelineState->isTessOffChip(), output, ldsOffset, builder);
writeValueToLds(true, output, ldsOffset, builder);
}
}

Expand Down Expand Up @@ -4545,11 +4534,8 @@ Value *PatchInOutImportExport::calcLdsOffsetForTcsOutput(Type *outputTy, unsigne
const auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStageTessControl)->inOutUsage.tcs;
const auto &calcFactor = inOutUsage.calcFactor;

auto outPatchStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.outPatchStart : calcFactor.onChip.outPatchStart;

auto patchConstStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.patchConstStart : calcFactor.onChip.patchConstStart;
auto outPatchStart = calcFactor.offChip.outPatchStart;
auto patchConstStart = calcFactor.offChip.patchConstStart;

// attribOffset = (location + locOffset) * 4 + compIdx * bitWidth / 32
Value *attribOffset = builder.getInt32(location);
Expand Down Expand Up @@ -4617,11 +4603,8 @@ Value *PatchInOutImportExport::calcLdsOffsetForTesInput(Type *inputTy, unsigned

const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStageTessControl)->inOutUsage.tcs.calcFactor;

auto outPatchStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.outPatchStart : calcFactor.onChip.outPatchStart;

auto patchConstStart =
m_pipelineState->isTessOffChip() ? calcFactor.offChip.patchConstStart : calcFactor.onChip.patchConstStart;
auto outPatchStart = calcFactor.offChip.outPatchStart;
auto patchConstStart = calcFactor.offChip.patchConstStart;

const auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(m_shaderStage)->entryArgIdxs.tes;

Expand Down Expand Up @@ -4735,12 +4718,10 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC

patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, optimalPatchCountPerThreadGroup);

if (m_pipelineState->isTessOffChip()) {
auto outPatchLdsBufferSize = (outPatchSize + patchConstSize) * 4;
auto tessOffChipPatchCountPerThreadGroup =
m_pipelineState->getTargetInfo().getGpuProperty().tessOffChipLdsBufferSize / outPatchLdsBufferSize;
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, tessOffChipPatchCountPerThreadGroup);
}
auto outPatchLdsBufferSize = (outPatchSize + patchConstSize) * 4;
auto tessOffChipPatchCountPerThreadGroup =
m_pipelineState->getTargetInfo().getGpuProperty().tessOffChipLdsBufferSize / outPatchLdsBufferSize;
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, tessOffChipPatchCountPerThreadGroup);

// TF-Buffer-based limit for Patchers per Thread Group:
// ---------------------------------------------------------------------------------------------
Expand All @@ -4758,13 +4739,11 @@ unsigned PatchInOutImportExport::calcPatchCountPerThreadGroup(unsigned inVertexC

patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, tfBufferPatchCountLimit);

if (m_pipelineState->isTessOffChip()) {
// For all-offchip tessellation, we need to write an additional 4-byte TCS control word to the TF buffer whenever
// the patch-ID is zero.
const unsigned offChipTfBufferPatchCountLimit =
(tfBufferSizeInBytes - (patchCountPerThreadGroup * sizeof(unsigned))) / (tessFactorStride * sizeof(unsigned));
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, offChipTfBufferPatchCountLimit);
}
// For all-offchip tessellation, we need to write an additional 4-byte TCS control word to the TF buffer whenever
// the patch-ID is zero.
const unsigned offChipTfBufferPatchCountLimit =
(tfBufferSizeInBytes - (patchCountPerThreadGroup * sizeof(unsigned))) / (tessFactorStride * sizeof(unsigned));
patchCountPerThreadGroup = std::min(patchCountPerThreadGroup, offChipTfBufferPatchCountLimit);

return patchCountPerThreadGroup;
}
Expand Down
7 changes: 2 additions & 5 deletions lgc/patch/RegisterMetadataBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ void RegisterMetadataBuilder::buildLsHsRegisters() {
getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::LsVgprCompCnt] = lsVgprCompCnt;

// Set LDS_SIZE of SPI_SHADER_PGM_RSRC2_HS
assert(m_pipelineState->isTessOffChip()); // Must be off-chip on GFX9+
unsigned ldsSizeInDwords = calcFactor.tessOnChipLdsSize;
ldsSizeInDwords += calcFactor.rayQueryLdsStackSize;

Expand Down Expand Up @@ -743,8 +742,7 @@ void RegisterMetadataBuilder::buildHwVsRegisters() {
else
getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::VsVgprCompCnt] = 2;

if (m_pipelineState->isTessOffChip())
getHwShaderNode(Util::Abi::HardwareStage::Vs)[Util::Abi::HardwareStageMetadataKey::OffchipLdsEn] = true;
getHwShaderNode(Util::Abi::HardwareStage::Vs)[Util::Abi::HardwareStageMetadataKey::OffchipLdsEn] = true;
}
}

Expand Down Expand Up @@ -1563,8 +1561,7 @@ void RegisterMetadataBuilder::setVgtTfParam() {
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::Type] = primType;
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::Partitioning] = partition;
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::Topology] = topology;
if (m_pipelineState->isTessOffChip())
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::DistributionMode] = TRAPEZOIDS;
vgtTfParam[Util::Abi::VgtTfParamMetadataKey::DistributionMode] = TRAPEZOIDS;
}

// =====================================================================================================================
Expand Down
Loading
Loading