diff --git a/lgc/include/lgc/state/IntrinsDefs.h b/lgc/include/lgc/state/IntrinsDefs.h index 05b2920fba..c2300a1b65 100644 --- a/lgc/include/lgc/state/IntrinsDefs.h +++ b/lgc/include/lgc/state/IntrinsDefs.h @@ -65,19 +65,19 @@ static const unsigned GsEmitCutStreamIdMask = 0x300; // Mask of STREAM_ID of the static const unsigned GetRealTime = 0x83; // [7] = 1, [6:0] = 3 // Count of user SGPRs used in copy shader -static const unsigned CopyShaderUserSgprCount = 3; +static const unsigned CopyShaderUserSgprCount = 2; // Entry-point argument index for the stream info in copy shader -static const unsigned CopyShaderEntryArgIdxStreamInfo = 3; +static const unsigned CopyShaderEntryArgIdxStreamInfo = 2; // Entry-point argument index for the stream-out write index in copy shader -static const unsigned CopyShaderEntryArgIdxWriteIndex = 4; +static const unsigned CopyShaderEntryArgIdxWriteIndex = 3; // Entry-point argument index for the stream offsets in copy shader -static const unsigned CopyShaderEntryArgIdxStreamOffset = 5; +static const unsigned CopyShaderEntryArgIdxStreamOffset = 4; // Entry-point argument index for the LDS offset of current vertices in GS-VS ring -static const unsigned CopyShaderEntryArgIdxVertexOffset = 9; +static const unsigned CopyShaderEntryArgIdxVertexOffset = 8; // Enumerates address spaces valid for AMD GPU (similar to LLVM header AMDGPU.h) enum AddrSpace { diff --git a/lgc/include/lgc/state/ResourceUsage.h b/lgc/include/lgc/state/ResourceUsage.h index c6b2c316be..7522eeb0c7 100644 --- a/lgc/include/lgc/state/ResourceUsage.h +++ b/lgc/include/lgc/state/ResourceUsage.h @@ -513,7 +513,6 @@ struct InterfaceData { struct { // Geometry shader struct { - unsigned copyShaderEsGsLdsSize; // ES -> GS ring LDS size (for copy shader) unsigned copyShaderStreamOutTable; // Stream-out table (for copy shader) } gs; diff --git a/lgc/patch/ConfigBuilderBase.cpp b/lgc/patch/ConfigBuilderBase.cpp index 9806f88c91..721bb2b2ec 100644 --- a/lgc/patch/ConfigBuilderBase.cpp +++ b/lgc/patch/ConfigBuilderBase.cpp @@ -208,14 +208,6 @@ void ConfigBuilderBase::setPsSampleMask(bool value) { m_pipelineNode[Util::Abi::PipelineMetadataKey::PsSampleMask] = value; } -// ===================================================================================================================== -// Set ES_GS_LDS_BYTE_SIZE -// -// @param value : Value to set -void ConfigBuilderBase::setEsGsLdsByteSize(unsigned value) { - m_pipelineNode[Util::Abi::PipelineMetadataKey::EsGsLdsSize] = value; -} - // ===================================================================================================================== // Set hardware stage wavefront // @@ -289,17 +281,6 @@ void ConfigBuilderBase::setLdsSizeByteSize(Util::Abi::HardwareStage hwStage, uns hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = value; } -// ===================================================================================================================== -// Set ES-GS LDS byte size -// -// @param value : Value to set -void ConfigBuilderBase::setEsGsLdsSize(unsigned value) { - if (value == 0) - return; // Optional - - m_pipelineNode[Util::Abi::PipelineMetadataKey::EsGsLdsSize] = value; -} - // ===================================================================================================================== // Set NGG subgroup size // diff --git a/lgc/patch/ConfigBuilderBase.h b/lgc/patch/ConfigBuilderBase.h index 69784c2cd7..cb4193c2c8 100644 --- a/lgc/patch/ConfigBuilderBase.h +++ b/lgc/patch/ConfigBuilderBase.h @@ -80,12 +80,10 @@ class ConfigBuilderBase { void setPsWritesUavs(bool value); void setPsWritesDepth(bool value); void setPsSampleMask(bool value); - void setEsGsLdsByteSize(unsigned value); void setWaveFrontSize(Util::Abi::HardwareStage hwStage, unsigned value); void setApiName(const char *value); void setPipelineType(Util::Abi::PipelineType value); void setLdsSizeByteSize(Util::Abi::HardwareStage hwStage, unsigned value); - void setEsGsLdsSize(unsigned value); void setNggSubgroupSize(unsigned value); void setThreadgroupDimensions(llvm::ArrayRef values); void setStreamOutVertexStrides(llvm::ArrayRef values); diff --git a/lgc/patch/Gfx9ConfigBuilder.cpp b/lgc/patch/Gfx9ConfigBuilder.cpp index aaf3c3d29c..61faaf0830 100644 --- a/lgc/patch/Gfx9ConfigBuilder.cpp +++ b/lgc/patch/Gfx9ConfigBuilder.cpp @@ -1151,7 +1151,6 @@ void ConfigBuilder::buildEsGsRegConfig(ShaderStage shaderStage1, ShaderStage sha SET_REG_FIELD(&config->esGsRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize); setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4); - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); unsigned maxVertOut = std::max(1u, static_cast(geometryMode.outputVertices)); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MAX_VERT_OUT, MAX_VERT_OUT, maxVertOut); @@ -1163,8 +1162,6 @@ void ConfigBuilder::buildEsGsRegConfig(ShaderStage shaderStage1, ShaderStage sha SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_ON); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, GS_WRITE_OPTIMIZE, false); - - setEsGsLdsByteSize(calcFactor.esGsLdsSize * 4); } else { SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ONCHIP, VGT_GS_MODE_ONCHIP_OFF); SET_REG_FIELD(&config->esGsRegs, VGT_GS_MODE, ES_WRITE_OPTIMIZE, false); @@ -1366,7 +1363,6 @@ void ConfigBuilder::buildPrimShaderRegConfig(ShaderStage shaderStage1, ShaderSta const unsigned ldsSize = ldsSizeInDwords >> ldsSizeDwordGranularityShift; SET_REG_FIELD(&config->primShaderRegs, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE, ldsSize); setLdsSizeByteSize(Util::Abi::HardwareStage::Gs, ldsSizeInDwords * 4); - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); if (m_gfxIp.major >= 11) { // Pixel wait sync+ diff --git a/lgc/patch/PatchCopyShader.cpp b/lgc/patch/PatchCopyShader.cpp index 4829de1a27..d109a44c62 100644 --- a/lgc/patch/PatchCopyShader.cpp +++ b/lgc/patch/PatchCopyShader.cpp @@ -45,14 +45,6 @@ #define DEBUG_TYPE "lgc-patch-copy-shader" -namespace llvm { -namespace cl { - -extern opt InRegEsGsLdsSize; - -} // namespace cl -} // namespace llvm - using namespace lgc; using namespace llvm; @@ -111,7 +103,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha // // void copyShader( // i32 inreg globalTable, - // i32 inreg esGsLdsSize (GFX9+), // i32 inreg streamOutTable (GFX9+), // i32 inreg streamOutInfo, // i32 inreg streamOutWriteIndex, @@ -122,12 +113,11 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha // i32 vertexOffset) // - argTys = {int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty}; + argTys = {int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty, int32Ty}; - argInReg = {true, true, true, true, true, true, true, true, true, false}; + argInReg = {true, true, true, true, true, true, true, true, false}; // clang-format off argNames = {"globalTable", - "esGsLdsSize", "streamOutTable", "streamOutInfo", "streamOutWriteIndex", @@ -149,15 +139,15 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha // GFX11+: // void copyShader( // i32 inreg globalTable, - // i32 vertexId) + // i32 vertexIndex) if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 10) { argTys = {int32Ty}; argInReg = {false}; - argNames = {"vertexId"}; + argNames = {"vertexIndex"}; } else { argTys = {int32Ty, int32Ty}; argInReg = {true, false}; - argNames = {"globalTable", "vertexId"}; + argNames = {"globalTable", "vertexIndex"}; } } @@ -201,12 +191,9 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha auto intfData = m_pipelineState->getShaderInterfaceData(ShaderStageCopyShader); if (!m_pipelineState->getNggControl()->enableNgg) { - // For GFX9+, streamOutTable SGPR index value should be greater than esGsLdsSize - intfData->userDataUsage.gs.copyShaderEsGsLdsSize = 1; - intfData->userDataUsage.gs.copyShaderStreamOutTable = 2; + intfData->userDataUsage.gs.copyShaderStreamOutTable = 1; } else { - // If NGG, both esGsLdsSize and streamOutTable are not used - intfData->userDataUsage.gs.copyShaderEsGsLdsSize = InvalidValue; + // If NGG, streamOutTable is not used intfData->userDataUsage.gs.copyShaderStreamOutTable = InvalidValue; } @@ -221,9 +208,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha if (m_pipelineState->enableXfb()) userData[intfData->userDataUsage.gs.copyShaderStreamOutTable] = static_cast(UserDataMapping::StreamOutTable); - if (cl::InRegEsGsLdsSize && m_pipelineState->isGsOnChip()) - userData[intfData->userDataUsage.gs.copyShaderEsGsLdsSize] = - static_cast(UserDataMapping::EsGsLdsSize); m_pipelineState->setUserDataMap(ShaderStageCopyShader, userData); } else { m_pipelineState->getPalMetadata()->setUserDataEntry(ShaderStageCopyShader, 0, UserDataMapping::GlobalTable); @@ -232,10 +216,6 @@ bool PatchCopyShader::runImpl(Module &module, PipelineShadersResult &pipelineSha intfData->userDataUsage.gs.copyShaderStreamOutTable, UserDataMapping::StreamOutTable); } - if (cl::InRegEsGsLdsSize && m_pipelineState->isGsOnChip()) { - m_pipelineState->getPalMetadata()->setUserDataEntry( - ShaderStageCopyShader, intfData->userDataUsage.gs.copyShaderEsGsLdsSize, UserDataMapping::EsGsLdsSize); - } } } diff --git a/lgc/patch/PatchEntryPointMutate.cpp b/lgc/patch/PatchEntryPointMutate.cpp index 9e4073c9cb..70a594750f 100644 --- a/lgc/patch/PatchEntryPointMutate.cpp +++ b/lgc/patch/PatchEntryPointMutate.cpp @@ -82,16 +82,6 @@ using namespace llvm; using namespace lgc; -namespace llvm { -namespace cl { - -// -inreg-esgs-lds-size: Add a dummy "inreg" argument for ES-GS LDS size, this is to keep consistent with PAL's -// GS on-chip behavior. In the future, if PAL allows hardcoded ES-GS LDS size, this option could be deprecated. -opt InRegEsGsLdsSize("inreg-esgs-lds-size", desc("For GS on-chip, add esGsLdsSize in user data"), init(true)); - -} // namespace cl -} // namespace llvm - // ===================================================================================================================== PatchEntryPointMutate::PatchEntryPointMutate() : m_hasTs(false), m_hasGs(false), @@ -1706,34 +1696,6 @@ void PatchEntryPointMutate::addSpecialUserDataArgs(SmallVectorImpl specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "viewId", userDataValue, argIdx)); } - // NOTE: Add a dummy "inreg" argument for ES-GS LDS size, this is to keep consistent - // with PAL's GS on-chip behavior (VS is in NGG primitive shader). - bool wantEsGsLdsSize = false; - switch (getMergedShaderStage(m_shaderStage)) { - case ShaderStageVertex: - wantEsGsLdsSize = enableNgg; - break; - case ShaderStageTessControl: - wantEsGsLdsSize = false; - break; - case ShaderStageTessEval: - wantEsGsLdsSize = enableNgg; - break; - case ShaderStageGeometry: - wantEsGsLdsSize = (m_pipelineState->isGsOnChip() && cl::InRegEsGsLdsSize) || enableNgg; - break; - default: - llvm_unreachable("Unexpected shader stage"); - } - if (wantEsGsLdsSize) { - auto userDataValue = UserDataMapping::EsGsLdsSize; - // For a standalone TCS (which can only happen in unit testing, not in a real pipeline), don't add - // the PAL metadata for it, for consistency with the old code. - if (m_shaderStage == ShaderStageVertex && !m_pipelineState->hasShaderStage(ShaderStageVertex)) - userDataValue = UserDataMapping::Invalid; - specialUserDataArgs.push_back(UserDataArg(builder.getInt32Ty(), "esGsLdsSize", userDataValue)); - } - if (getMergedShaderStage(m_shaderStage) == getMergedShaderStage(ShaderStageVertex)) { // This is the VS, or the shader that VS is merged into on GFX9+. auto vsIntfData = m_pipelineState->getShaderInterfaceData(ShaderStageVertex); diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index d49c2ed82d..9ff449c947 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -727,8 +727,7 @@ bool PatchResourceCollect::checkGsOnChipValidity() { gsResUsage->inOutUsage.gs.calcFactor.esVertsPerSubgroup = esVertsPerSubgroup; gsResUsage->inOutUsage.gs.calcFactor.gsPrimsPerSubgroup = gsPrimsPerSubgroup; - // EsGsLdsSize is passed in a user data SGPR to the merged shader so that the API GS knows where to start - // reading out of LDS. EsGsLdsSize is unnecessary when there is no API GS. + // EsGsLdsSize is unnecessary when there is no API GS. gsResUsage->inOutUsage.gs.calcFactor.esGsLdsSize = hasGs ? expectedEsLdsSize : 0; gsResUsage->inOutUsage.gs.calcFactor.gsOnChipLdsSize = needsLds ? ldsSizeDwords : 0; diff --git a/lgc/patch/RegisterMetadataBuilder.cpp b/lgc/patch/RegisterMetadataBuilder.cpp index 94c0bbc05c..5086908d34 100644 --- a/lgc/patch/RegisterMetadataBuilder.cpp +++ b/lgc/patch/RegisterMetadataBuilder.cpp @@ -402,7 +402,6 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { auto hwShaderNode = getHwShaderNode(Util::Abi::HardwareStage::Gs); hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = calcLdsSize(ldsSizeInDwords); - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); } // ===================================================================================================================== @@ -673,8 +672,6 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { auto hwShaderNode = getHwShaderNode(Util::Abi::HardwareStage::Gs); hwShaderNode[Util::Abi::HardwareStageMetadataKey::LdsSize] = calcLdsSize(ldsSizeInDwords); - if (!m_hasMesh) - setEsGsLdsSize(calcFactor.esGsLdsSize * 4); } // ===================================================================================================================== diff --git a/lgc/patch/ShaderInputs.cpp b/lgc/patch/ShaderInputs.cpp index d4dd2df0de..1a31e38255 100644 --- a/lgc/patch/ShaderInputs.cpp +++ b/lgc/patch/ShaderInputs.cpp @@ -57,8 +57,6 @@ const char *ShaderInputs::getSpecialUserDataName(UserDataMapping kind) { return "DrawIndex"; case UserDataMapping::Workgroup: return "Workgroup"; - case UserDataMapping::EsGsLdsSize: - return "EsGsLdsSize"; case UserDataMapping::ViewId: return "ViewId"; case UserDataMapping::StreamOutTable: diff --git a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe index d5f60e5dfa..b82d785915 100644 --- a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe +++ b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe @@ -302,12 +302,11 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .offchip_lds_en: false ; CHECK-NEXT: .scratch_en: false ; CHECK-NEXT: .scratch_memory_size: 0 -; CHECK-NEXT: .sgpr_count: 0xc +; CHECK-NEXT: .sgpr_count: 0xb ; CHECK-NEXT: .sgpr_limit: 0x6a ; CHECK-NEXT: .trap_present: 0 ; CHECK-NEXT: .user_data_reg_map: ; CHECK-NEXT: - 0x10000000 -; CHECK-NEXT: - 0x1000000a ; CHECK-NEXT: - 0x10000003 ; CHECK-NEXT: - 0x10000004 ; CHECK-NEXT: - 0xffffffff @@ -338,7 +337,8 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: - 0xffffffff ; CHECK-NEXT: - 0xffffffff ; CHECK-NEXT: - 0xffffffff -; CHECK-NEXT: .user_sgprs: 0x4 +; CHECK-NEXT: - 0xffffffff +; CHECK-NEXT: .user_sgprs: 0x3 ; CHECK-NEXT: .vgpr_count: 0x9 ; CHECK-NEXT: .vgpr_limit: 0x100 ; CHECK-NEXT: .wavefront_size: 0x40