diff --git a/.clang-tidy b/.clang-tidy index 9e52258a59..b1475cffe4 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -31,7 +31,7 @@ CheckOptions: - { key: readability-identifier-naming.MemberPrefix, value: m_ } - { key: readability-identifier-naming.MemberRemovePrefixes, value: 'p,b,pfn,m_p,m_b' } - { key: readability-identifier-naming.MethodCase, value: camelBack } - - { key: readability-identifier-naming.MethodIgnoredRegexp, value: '^Create$|^CreateACos$|^CreateACosh$|^CreateASin$|^CreateASinh$|^CreateATan$|^CreateATan2$|^CreateATanh$|^CreateBarrier$|^CreateBinaryIntrinsic$|^CreateCosh$|^CreateCrossProduct$|^CreateCubeFace.*$|^CreateDemoteToHelperInvocation$|^CreateDerivative$|^CreateDeterminant$|^CreateDotProduct$|^CreateEmitVertex$|^CreateEndPrimitive$|^CreateExp$|^CreateExtract.*$|^CreateFaceForward$|^CreateFClamp$|^CreateFindSMsb$|^CreateFma$|^CreateFMax$|^CreateFMax3$|^CreateFMid3$|^CreateFMin$|^CreateFMin3$|^CreateFMod$|^CreateFpTruncWithRounding$|^CreateFract$|^CreateFSign$|^CreateGet.*$|^CreateImage.*$|^CreateIndexDescPtr$|^CreateInsertBitField$|^CreateIntrinsic$|^CreateInverseSqrt$|^CreateIs.*$|^CreateKill$|^CreateLdexp$|^CreateLoad.*$|^CreateLog$|^CreateMapToInt32$|^CreateMatrix.*$|^CreateNormalizeVector$|^CreateOuterProduct$|^CreatePower$|^CreateQuantizeToFp16$|^CreateRead.*$|^CreateReflect$|^CreateRefract$|^CreateSAbs$|^CreateSinh$|^CreateSMod$|^CreateSmoothStep$|^CreateSSign$|^CreateSubgroup.*$|^CreateTan$|^CreateTanh$|^CreateTransposeMatrix$|^CreateUnaryIntrinsic$|^CreateVectorTimesMatrix$|^CreateWrite.*Output$|^Serialize$|^Merge$|^Destroy$|^ConvertColorBufferFormatToExportFormat$|^BuildShaderModule$|^BuildGraphicsPipeline$|^BuildComputePipeline$|^IsVertexFormatSupported$|^DumpSpirvBinary$|^BeginPipelineDump$|^EndPipelineDump$|^DumpPipelineBinary$|^DumpPipelineExtraInfo$|^GetShaderHash$|^GetPipelineHash$|^GetPipelineName$|^CreateShaderCache$|^ReadFromBuffer$|^GetSectionIndex$|^GetSymbolsBySectionIndex$|^GetSectionData$' } + - { key: readability-identifier-naming.MethodIgnoredRegexp, value: '^Create$|^CreateACos$|^CreateACosh$|^CreateASin$|^CreateASinh$|^CreateATan$|^CreateATan2$|^CreateATanh$|^CreateBarrier$|^CreateBinaryIntrinsic$|^CreateCosh$|^CreateCrossProduct$|^CreateCubeFace.*$|^CreateDemoteToHelperInvocation$|^CreateDerivative$|^CreateDeterminant$|^CreateDotProduct$|^CreateEmitVertex$|^CreateEndPrimitive$|^CreateExp$|^CreateExtract.*$|^CreateFaceForward$|^CreateFClamp$|^CreateFindSMsb$|^CreateFma$|^CreateFMax$|^CreateFMax3$|^CreateFMid3$|^CreateFMin$|^CreateFMin3$|^CreateFMod$|^CreateFpTruncWithRounding$|^CreateFract$|^CreateFSign$|^CreateGet.*$|^CreateImage.*$|^CreateIndexDescPtr$|^CreateInsertBitField$|^CreateIntrinsic$|^CreateInverseSqrt$|^CreateIs.*$|^CreateKill$|^CreateLdexp$|^CreateLoad.*$|^CreateLog$|^CreateMapToSimpleType$|^CreateMatrix.*$|^CreateNormalizeVector$|^CreateOuterProduct$|^CreatePower$|^CreateQuantizeToFp16$|^CreateRead.*$|^CreateReflect$|^CreateRefract$|^CreateSAbs$|^CreateSinh$|^CreateSMod$|^CreateSmoothStep$|^CreateSSign$|^CreateSubgroup.*$|^CreateTan$|^CreateTanh$|^CreateTransposeMatrix$|^CreateUnaryIntrinsic$|^CreateVectorTimesMatrix$|^CreateWrite.*Output$|^Serialize$|^Merge$|^Destroy$|^ConvertColorBufferFormatToExportFormat$|^BuildShaderModule$|^BuildGraphicsPipeline$|^BuildComputePipeline$|^IsVertexFormatSupported$|^DumpSpirvBinary$|^BeginPipelineDump$|^EndPipelineDump$|^DumpPipelineBinary$|^DumpPipelineExtraInfo$|^GetShaderHash$|^GetPipelineHash$|^GetPipelineName$|^CreateShaderCache$|^ReadFromBuffer$|^GetSectionIndex$|^GetSymbolsBySectionIndex$|^GetSectionData$' } - { key: readability-identifier-naming.FunctionIgnoredRegexp, value: 'EnableOuts|EnableErrs' } - { key: readability-identifier-naming.FunctionCase, value: camelBack } - { key: readability-identifier-naming.TypeCase, value: CamelCase } diff --git a/lgc/builder/BuilderBase.cpp b/lgc/builder/BuilderBase.cpp index c5f5dcd13a..712d2aecc1 100644 --- a/lgc/builder/BuilderBase.cpp +++ b/lgc/builder/BuilderBase.cpp @@ -153,16 +153,17 @@ Value *BuilderBase::CreateAddByteOffset(Value *pointer, Value *byteOffset, const } // ===================================================================================================================== -// Create a map to i32 function. Many AMDGCN intrinsics only take i32's, so we need to massage input data into an i32 -// to allow us to call these intrinsics. This helper takes a function pointer, massage arguments, and passthrough -// arguments and massages the mappedArgs into i32's before calling the function pointer. Note that all massage -// arguments must have the same type. +// Create a map to simple type function. Many AMDGCN intrinsics only take MapToSimpleTypeMode, so we need to massage +// input data into a simple type mode to allow us to call these intrinsics. This helper takes a function pointer, +// massage arguments, and passthrough arguments and massages the mappedArgs into simple type mode before calling the +// function pointer. Note that all massage arguments must have the same type. // -// @param mapFunc : The function to call on each provided i32. -// @param mappedArgs : The arguments to be massaged into i32's and passed to function. +// @param mapFunc : The function to call on each provided simple type mode. +// @param mappedArgs : The arguments to be massaged into simple type mode and passed to function. // @param passthroughArgs : The arguments to be passed through as is (no massaging). -Value *BuilderBase::CreateMapToInt32(MapToInt32Func mapFunc, ArrayRef mappedArgs, - ArrayRef passthroughArgs) { +// @param simpleMode : The arguments to specify the simple type mode +Value *BuilderBase::CreateMapToSimpleType(MapToSimpleTypeFunc mapFunc, ArrayRef mappedArgs, + ArrayRef passthroughArgs, MapToSimpleMode simpleMode) { // We must have at least one argument to massage. assert(mappedArgs.size() > 0); @@ -172,35 +173,60 @@ Value *BuilderBase::CreateMapToInt32(MapToInt32Func mapFunc, ArrayRef m for (unsigned i = 1; i < mappedArgs.size(); i++) assert(mappedArgs[i]->getType() == type); - if (mappedArgs[0]->getType()->isVectorTy()) { - // For vectors we extract each vector component and map them individually. - const unsigned compCount = cast(type)->getNumElements(); - - SmallVector results; - - for (unsigned i = 0; i < compCount; i++) { + if (type->isStructTy()) { + assert(simpleMode == MapToSimpleMode::SimpleVector); + // For struct we extract each member and map them individually. + const unsigned memberCount = type->getStructNumElements(); + SmallVector results; + for (unsigned i = 0; i < memberCount; ++i) { SmallVector newMappedArgs; - for (Value *const mappedArg : mappedArgs) - newMappedArgs.push_back(CreateExtractElement(mappedArg, i)); + newMappedArgs.push_back(CreateExtractValue(mappedArg, i)); - results.push_back(CreateMapToInt32(mapFunc, newMappedArgs, passthroughArgs)); + results.push_back(CreateMapToSimpleType(mapFunc, newMappedArgs, passthroughArgs, MapToSimpleMode::SimpleVector)); } - Value *result = PoisonValue::get(FixedVectorType::get(results[0]->getType(), compCount)); - - for (unsigned i = 0; i < compCount; i++) - result = CreateInsertElement(result, results[i], i); + Value *result = PoisonValue::get(type); + for (unsigned i = 0; i < memberCount; ++i) + result = CreateInsertValue(result, results[i], i); return result; } + if (type->isVectorTy()) { + if (simpleMode == MapToSimpleMode::Int32) { + // For vectors we extract each vector component and map them individually. + const unsigned compCount = cast(type)->getNumElements(); + + SmallVector results; + + for (unsigned i = 0; i < compCount; i++) { + SmallVector newMappedArgs; + + for (Value *const mappedArg : mappedArgs) + newMappedArgs.push_back(CreateExtractElement(mappedArg, i)); + + results.push_back(CreateMapToSimpleType(mapFunc, newMappedArgs, passthroughArgs)); + } + + Value *result = PoisonValue::get(FixedVectorType::get(results[0]->getType(), compCount)); + + for (unsigned i = 0; i < compCount; i++) + result = CreateInsertElement(result, results[i], i); + + return result; + } else if (simpleMode == MapToSimpleMode::SimpleVector) { + return mapFunc(*this, mappedArgs, passthroughArgs); + } else { + llvm_unreachable("Unhandled simple mode"); + } + } if (type->isIntegerTy() && type->getIntegerBitWidth() == 1) { SmallVector newMappedArgs; for (Value *const mappedArg : mappedArgs) newMappedArgs.push_back(CreateZExt(mappedArg, getInt32Ty())); - Value *const result = CreateMapToInt32(mapFunc, newMappedArgs, passthroughArgs); + Value *const result = CreateMapToSimpleType(mapFunc, newMappedArgs, passthroughArgs); return CreateTrunc(result, getInt1Ty()); } if (type->isIntegerTy() && type->getIntegerBitWidth() < 32) { @@ -214,7 +240,7 @@ Value *BuilderBase::CreateMapToInt32(MapToInt32Func mapFunc, ArrayRef m newMappedArgs.push_back(CreateBitCast(newMappedArg, getInt32Ty())); } - Value *const result = CreateMapToInt32(mapFunc, newMappedArgs, passthroughArgs); + Value *const result = CreateMapToSimpleType(mapFunc, newMappedArgs, passthroughArgs); return CreateExtractElement(CreateBitCast(result, vectorType), static_cast(0)); } if (type->getPrimitiveSizeInBits() == 64) { @@ -231,7 +257,7 @@ Value *BuilderBase::CreateMapToInt32(MapToInt32Func mapFunc, ArrayRef m for (Value *const castMappedArg : castMappedArgs) newMappedArgs.push_back(CreateExtractElement(castMappedArg, i)); - Value *const resultComp = CreateMapToInt32(mapFunc, newMappedArgs, passthroughArgs); + Value *const resultComp = CreateMapToSimpleType(mapFunc, newMappedArgs, passthroughArgs); result = CreateInsertElement(result, resultComp, i); } @@ -244,7 +270,7 @@ Value *BuilderBase::CreateMapToInt32(MapToInt32Func mapFunc, ArrayRef m for (Value *const mappedArg : mappedArgs) newMappedArgs.push_back(CreateBitCast(mappedArg, getIntNTy(mappedArg->getType()->getPrimitiveSizeInBits()))); - Value *const result = CreateMapToInt32(mapFunc, newMappedArgs, passthroughArgs); + Value *const result = CreateMapToSimpleType(mapFunc, newMappedArgs, passthroughArgs); return CreateBitCast(result, type); } if (type->isIntegerTy(32)) @@ -266,7 +292,7 @@ Value *BuilderBase::CreateInlineAsmSideEffect(Value *const value) { return builder.CreateCall(inlineAsm, value); }; - return CreateMapToInt32(mapFunc, value, {}); + return CreateMapToSimpleType(mapFunc, value, {}); } // ===================================================================================================================== @@ -281,5 +307,41 @@ Value *BuilderBase::CreateSetInactive(Value *active, Value *inactive) { return builder.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, active->getType(), {active, inactive}); }; - return CreateMapToInt32(mapFunc, {active, inactive}, {}); + return CreateMapToSimpleType(mapFunc, {active, inactive}, {}); +} + +// ===================================================================================================================== +// Create a waterfall end intrinsic. +// +// @param nonUniform: The instruction to put in a end waterfall loop. +// @param waterfallBegin: The waterfall begin intrinsic. +Instruction *BuilderBase::CreateWaterfallEnd(Value *nonUniform, Value *waterfallBegin) { + + auto nonUniformInst = cast(nonUniform); + Instruction *resultValue = nonUniformInst; + + // End the waterfall loop (as long as nonUniformInst is not a store with no result). + if (!nonUniformInst->getType()->isVoidTy()) { + SetInsertPoint(nonUniformInst->getNextNode()); + SetCurrentDebugLocation(nonUniformInst->getDebugLoc()); + + Type *waterfallEndTy = resultValue->getType(); + if (auto vecTy = dyn_cast(waterfallEndTy)) { + if (vecTy->getElementType()->isIntegerTy(8)) { + // ISel does not like waterfall.end with vector of i8 type, so cast if necessary. + assert((vecTy->getNumElements() % 4) == 0); + waterfallEndTy = getInt32Ty(); + if (vecTy->getNumElements() != 4) + waterfallEndTy = FixedVectorType::get(getInt32Ty(), vecTy->getNumElements() / 4); + resultValue = cast(CreateBitCast(resultValue, waterfallEndTy)); + } + } + resultValue = + CreateIntrinsic(Intrinsic::amdgcn_waterfall_end, waterfallEndTy, {waterfallBegin, resultValue}, nullptr); + + if (waterfallEndTy != nonUniformInst->getType()) + resultValue = cast(CreateBitCast(resultValue, nonUniformInst->getType())); + } + + return resultValue; } diff --git a/lgc/builder/BuilderImpl.cpp b/lgc/builder/BuilderImpl.cpp index 2a3197be06..0a1f9d82c1 100644 --- a/lgc/builder/BuilderImpl.cpp +++ b/lgc/builder/BuilderImpl.cpp @@ -679,38 +679,16 @@ Instruction *BuilderImpl::createWaterfallLoop(Instruction *nonUniformInst, Array } } - Instruction *resultValue = nonUniformInst; - - // End the waterfall loop (as long as nonUniformInst is not a store with no result). - if (!nonUniformInst->getType()->isVoidTy()) { - SetInsertPoint(nonUniformInst->getNextNode()); - SetCurrentDebugLocation(nonUniformInst->getDebugLoc()); - - Use *useOfNonUniformInst = nullptr; - Type *waterfallEndTy = resultValue->getType(); - if (auto vecTy = dyn_cast(waterfallEndTy)) { - if (vecTy->getElementType()->isIntegerTy(8)) { - // ISel does not like waterfall.end with vector of i8 type, so cast if necessary. - assert((vecTy->getNumElements() % 4) == 0); - waterfallEndTy = getInt32Ty(); - if (vecTy->getNumElements() != 4) - waterfallEndTy = FixedVectorType::get(getInt32Ty(), vecTy->getNumElements() / 4); - resultValue = cast(CreateBitCast(resultValue, waterfallEndTy, instName)); - useOfNonUniformInst = &resultValue->getOperandUse(0); - } - } - resultValue = CreateIntrinsic(Intrinsic::amdgcn_waterfall_end, waterfallEndTy, {waterfallBegin, resultValue}, - nullptr, instName); - if (!useOfNonUniformInst) - useOfNonUniformInst = &resultValue->getOperandUse(1); - if (waterfallEndTy != nonUniformInst->getType()) - resultValue = cast(CreateBitCast(resultValue, nonUniformInst->getType(), instName)); - - // Replace all uses of nonUniformInst with the result of this code. - *useOfNonUniformInst = PoisonValue::get(nonUniformInst->getType()); - nonUniformInst->replaceAllUsesWith(resultValue); - *useOfNonUniformInst = nonUniformInst; - } + if (nonUniformInst->getType()->isVoidTy()) + return nonUniformInst; + + auto mapFunc = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef passthroughArgs) -> Value * { + return builder.CreateWaterfallEnd(mappedArgs[0], passthroughArgs[0]); + }; + + SetInsertPoint(nonUniformInst->getNextNode()); + auto resultValue = + cast(CreateMapToSimpleType(mapFunc, nonUniformInst, waterfallBegin, MapToSimpleMode::SimpleVector)); return resultValue; #endif diff --git a/lgc/builder/SubgroupBuilder.cpp b/lgc/builder/SubgroupBuilder.cpp index 5d145ebe8b..50604cad1d 100644 --- a/lgc/builder/SubgroupBuilder.cpp +++ b/lgc/builder/SubgroupBuilder.cpp @@ -178,7 +178,7 @@ Value *BuilderImpl::CreateSubgroupBroadcast(Value *const value, Value *const ind {mappedArgs[0], passthroughArgs[0]}); }; - return CreateMapToInt32(mapFunc, value, index); + return CreateMapToSimpleType(mapFunc, value, index); } // ===================================================================================================================== @@ -194,7 +194,7 @@ Value *BuilderImpl::CreateSubgroupBroadcastWaterfall(Value *const value, Value * builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_readlane, {mappedArgs[0], passthroughArgs[0]}); return createWaterfallLoop(cast(readlane), 1); }; - return CreateMapToInt32(mapFunc, value, index); + return CreateMapToSimpleType(mapFunc, value, index); } // ===================================================================================================================== @@ -207,7 +207,7 @@ Value *BuilderImpl::CreateSubgroupBroadcastFirst(Value *const value, const Twine return builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, mappedArgs[0]); }; - return CreateMapToInt32(mapFunc, {BuilderBase::get(*this).CreateInlineAsmSideEffect(value)}, {}); + return CreateMapToSimpleType(mapFunc, {BuilderBase::get(*this).CreateInlineAsmSideEffect(value)}, {}); } // ===================================================================================================================== @@ -346,7 +346,7 @@ Value *BuilderImpl::CreateSubgroupShuffle(Value *const value, Value *const index }; // The ds_bpermute intrinsic requires the index be multiplied by 4. - return CreateMapToInt32(mapFunc, value, CreateMul(index, getInt32(4))); + return CreateMapToSimpleType(mapFunc, value, CreateMul(index, getInt32(4))); } if (supportPermLane64Dpp()) { @@ -364,15 +364,15 @@ Value *BuilderImpl::CreateSubgroupShuffle(Value *const value, Value *const index return builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_permlane64, {mappedArgs[0]}); }; - auto swapped = CreateMapToInt32(permuteFunc, wwmValue, {}); + auto swapped = CreateMapToSimpleType(permuteFunc, wwmValue, {}); auto bPermFunc = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef passthroughArgs) -> Value * { return builder.CreateIntrinsic(Intrinsic::amdgcn_ds_bpermute, {}, {passthroughArgs[0], mappedArgs[0]}); }; - auto bPermSameHalf = CreateMapToInt32(bPermFunc, wwmValue, wwmIndex); - auto bPermOtherHalf = CreateMapToInt32(bPermFunc, swapped, wwmIndex); + auto bPermSameHalf = CreateMapToSimpleType(bPermFunc, wwmValue, wwmIndex); + auto bPermOtherHalf = CreateMapToSimpleType(bPermFunc, swapped, wwmIndex); bPermOtherHalf = createWwm(bPermOtherHalf); auto const threadId = CreateSubgroupMbcnt(getInt64(UINT64_MAX), ""); @@ -389,7 +389,7 @@ Value *BuilderImpl::CreateSubgroupShuffle(Value *const value, Value *const index return createWaterfallLoop(cast(readlane), 1); }; - return CreateMapToInt32(mapFunc, value, index); + return CreateMapToSimpleType(mapFunc, value, index); } // ===================================================================================================================== @@ -1143,7 +1143,7 @@ Value *BuilderImpl::CreateSubgroupWriteInvocation(Value *const inputValue, Value }); }; - return CreateMapToInt32(mapFunc, {inputValue, writeValue}, invocationIndex); + return CreateMapToSimpleType(mapFunc, {inputValue, writeValue}, invocationIndex); } // ===================================================================================================================== @@ -1282,7 +1282,7 @@ Value *BuilderImpl::createDppMov(Value *const value, DppCtrl dppCtrl, unsigned r {mappedArgs[0], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2], passthroughArgs[3]}); }; - return CreateMapToInt32( + return CreateMapToSimpleType( mapFunc, value, {getInt32(static_cast(dppCtrl)), getInt32(rowMask), getInt32(bankMask), getInt1(boundCtrl)}); } @@ -1304,7 +1304,7 @@ Value *BuilderImpl::createDppUpdate(Value *const origValue, Value *const updateV {mappedArgs[0], mappedArgs[1], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2], passthroughArgs[3]}); }; - return CreateMapToInt32( + return CreateMapToSimpleType( mapFunc, { origValue, @@ -1331,7 +1331,7 @@ Value *BuilderImpl::createPermLane16(Value *const origValue, Value *const update {mappedArgs[0], mappedArgs[1], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2], passthroughArgs[3]}); }; - return CreateMapToInt32( + return CreateMapToSimpleType( mapFunc, { origValue, @@ -1358,7 +1358,7 @@ Value *BuilderImpl::createPermLaneX16(Value *const origValue, Value *const updat {mappedArgs[0], mappedArgs[1], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2], passthroughArgs[3]}); }; - return CreateMapToInt32( + return CreateMapToSimpleType( mapFunc, { origValue, @@ -1376,7 +1376,7 @@ Value *BuilderImpl::createPermLane64(Value *const updateValue) { return builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_permlane64, {mappedArgs[0]}); }; - return CreateMapToInt32(mapFunc, updateValue, {}); + return CreateMapToSimpleType(mapFunc, updateValue, {}); } // ===================================================================================================================== @@ -1389,7 +1389,7 @@ Value *BuilderImpl::createDsSwizzle(Value *const value, uint16_t dsPattern) { return builder.CreateIntrinsic(Intrinsic::amdgcn_ds_swizzle, {}, {mappedArgs[0], passthroughArgs[0]}); }; - return CreateMapToInt32(mapFunc, value, getInt32(dsPattern)); + return CreateMapToSimpleType(mapFunc, value, getInt32(dsPattern)); } // ===================================================================================================================== @@ -1401,7 +1401,7 @@ Value *BuilderImpl::createWwm(Value *const value) { return builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_wwm, mappedArgs[0]); }; - return CreateMapToInt32(mapFunc, value, {}); + return CreateMapToSimpleType(mapFunc, value, {}); } // ===================================================================================================================== @@ -1415,7 +1415,7 @@ Value *BuilderImpl::createWqm(Value *const value) { }; if (m_shaderStage == ShaderStageFragment) - return CreateMapToInt32(mapFunc, value, {}); + return CreateMapToSimpleType(mapFunc, value, {}); return value; } diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index c758bb77ec..b5729e2414 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -117,9 +117,11 @@ class BuilderImpl : public BuilderDefs { llvm::Value *CreateAddByteOffset(llvm::Value *pointer, llvm::Value *byteOffset, const llvm::Twine &instName = "") { return BuilderBase::get(*this).CreateAddByteOffset(pointer, byteOffset, instName); } - llvm::Value *CreateMapToInt32(BuilderBase::MapToInt32Func mapFunc, llvm::ArrayRef mappedArgs, - llvm::ArrayRef passthroughArgs) { - return BuilderBase::get(*this).CreateMapToInt32(mapFunc, mappedArgs, passthroughArgs); + + llvm::Value *CreateMapToSimpleType(BuilderBase::MapToSimpleTypeFunc mapFunc, llvm::ArrayRef mappedArgs, + llvm::ArrayRef passthroughArgs, + MapToSimpleMode simpleMode = MapToSimpleMode::Int32) { + return BuilderBase::get(*this).CreateMapToSimpleType(mapFunc, mappedArgs, passthroughArgs, simpleMode); } PipelineState *m_pipelineState = nullptr; // Pipeline state diff --git a/lgc/include/lgc/util/BuilderBase.h b/lgc/include/lgc/util/BuilderBase.h index a6526a88ee..13432baeb4 100644 --- a/lgc/include/lgc/util/BuilderBase.h +++ b/lgc/include/lgc/util/BuilderBase.h @@ -34,6 +34,11 @@ namespace lgc { +enum class MapToSimpleMode : unsigned { + Int32, + SimpleVector, +}; + // ===================================================================================================================== // BuilderBase extends BuilderCommon, and provides some utility methods used within LGC. // Methods here can be used directly from a BuilderImpl subclass, such as InOutBuilder. @@ -69,24 +74,30 @@ class BuilderBase : public BuilderCommon { // @param instName : Name to give instruction llvm::Value *CreateAddByteOffset(llvm::Value *pointer, llvm::Value *byteOffset, const llvm::Twine &instName = ""); - // Type of function to pass to CreateMapToInt32 + // Type of function to pass to CreateMapToSimpleType typedef llvm::function_ref mappedArgs, llvm::ArrayRef passthroughArgs)> - MapToInt32Func; + MapToSimpleTypeFunc; - // Create a call that'll map the massage arguments to an i32 type (for functions that only take i32). + // Create a call that'll map the massage arguments to a simple type (for functions that only take i32 or simple + // vector). // - // @param mapFunc : Pointer to the function to call on each i32. - // @param mappedArgs : The arguments to massage into an i32 type. + // @param mapFunc : Pointer to the function to call on each simple type. + // @param mappedArgs : The arguments to massage into a simple type. // @param passthroughArgs : The arguments to pass-through without massaging. - llvm::Value *CreateMapToInt32(MapToInt32Func mapFunc, llvm::ArrayRef mappedArgs, - llvm::ArrayRef passthroughArgs); + // @param simpleMode: The arguments to give the simple type + llvm::Value *CreateMapToSimpleType(MapToSimpleTypeFunc mapFunc, llvm::ArrayRef mappedArgs, + llvm::ArrayRef passthroughArgs, + MapToSimpleMode simpleMode = MapToSimpleMode::Int32); // Create an inline assembly call to cause a side effect (used to work around miscompiles with convergent). llvm::Value *CreateInlineAsmSideEffect(llvm::Value *const value); // Create a call to set inactive. Both active and inactive should have the same type. llvm::Value *CreateSetInactive(llvm::Value *const active, llvm::Value *const inactive); + + // Create a waterfall end intrinsic. + llvm::Instruction *CreateWaterfallEnd(llvm::Value *nonUniformInst, llvm::Value *waterfallBegin); }; } // namespace lgc diff --git a/lgc/test/TestWaterfallLoopForStruct.lgc b/lgc/test/TestWaterfallLoopForStruct.lgc new file mode 100644 index 0000000000..7b88e9ccdc --- /dev/null +++ b/lgc/test/TestWaterfallLoopForStruct.lgc @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc +; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s +; ModuleID = 'lgcPipeline' +source_filename = "llpc_vertex_2" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8-p32:32:32" +target triple = "amdgcn--amdpal" + +; Function Attrs: nounwind +define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !11 !lgc.shaderstage !1 { +.entry: + %0 = call i32 (...) @lgc.create.read.generic.input.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) + %1 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) + %2 = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 0, i32 0) + %3 = insertvalue { ptr addrspace(4), i32, i32, i32 } poison, i32 %2, 1 + %4 = insertvalue { ptr addrspace(4), i32, i32, i32 } %3, i32 32, 2 + %5 = insertvalue { ptr addrspace(4), i32, i32, i32 } %4, i32 1, 3 + %6 = mul i32 %0, %2 + %7 = sext i32 %6 to i64 + %8 = getelementptr i8, ptr addrspace(4) %1, i64 %7 + %9 = insertvalue { ptr addrspace(4), i32, i32, i32 } %5, ptr addrspace(4) %8, 0 + %10 = load <8 x i32>, ptr addrspace(4) %8, align 32, !invariant.load !12 + %11 = insertvalue [3 x <8 x i32>] poison, <8 x i32> %10, 0 + %12 = call { <4 x float>, i32 } (...) @"lgc.create.image.load.s[v4f32,i32]"(i32 1, i32 8, <8 x i32> %10, <2 x i32> ) + %13 = extractvalue { <4 x float>, i32 } %12, 1 + %14 = extractvalue { <4 x float>, i32 } %12, 0 + %15 = icmp sgt i32 %13, 0 + %16 = select i1 %15, <4 x float> %14, <4 x float> zeroinitializer + call void (...) @lgc.create.write.generic.output(<4 x float> %16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) + ret void +} + +declare spir_func void @spirv.NonUniform.i32(i32) local_unnamed_addr + +; Function Attrs: nounwind memory(none) +declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 + +; Function Attrs: nounwind memory(none) +declare i32 @lgc.create.get.desc.stride.i32(...) local_unnamed_addr #1 + +declare spir_func void @"spirv.NonUniform.s[p4,i32,i32,i32]"({ ptr addrspace(4), i32, i32, i32 }) local_unnamed_addr + +declare spir_func void @spirv.NonUniform.a3v8i32([3 x <8 x i32>]) local_unnamed_addr + +; Function Attrs: nounwind willreturn memory(read) +declare { <4 x float>, i32 } @"lgc.create.image.load.s[v4f32,i32]"(...) local_unnamed_addr #2 + +; Function Attrs: nounwind willreturn memory(read) +declare i32 @lgc.create.read.generic.input.i32(...) local_unnamed_addr #2 + +; Function Attrs: nounwind +declare void @lgc.create.write.generic.output(...) local_unnamed_addr #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind memory(none) } +attributes #2 = { nounwind willreturn memory(read) } + +!lgc.client = !{!0} +!lgc.unlinked = !{!1} +!lgc.options = !{!2} +!lgc.options.VS = !{!3} +!lgc.user.data.nodes = !{!4, !5, !6, !7} +!lgc.vertex.inputs = !{!8} +!lgc.input.assembly.state = !{!9} +!amdgpu.pal.metadata.msgpack = !{!10} + +!0 = !{!"Vulkan"} +!1 = !{i32 1} +!2 = !{i32 -1631706765, i32 -1916588938, i32 -1773735133, i32 -934651961, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 272, i32 0, i32 0, i32 1, i32 256, i32 256, i32 2, i32 1} +!3 = !{i32 40339196, i32 -259027446, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} +!4 = !{!"DescriptorTableVaPtr", i32 7, i32 0, i32 0, i32 1, i32 1} +!5 = !{!"DescriptorResource", i32 1, i32 0, i32 0, i32 32, i64 0, i32 0, i32 8} +!6 = !{!"IndirectUserDataVaPtr", i32 8, i32 0, i32 1, i32 1, i32 256} +!7 = !{!"StreamOutTableVaPtr", i32 11, i32 0, i32 2, i32 1, i32 0} +!8 = !{i32 0, i32 0, i32 0, i32 16, i32 14, i32 5, i32 -1} +!9 = !{i32 3} +!10 = !{!"\82\B0amdpal.pipelines\91\83\B0.spill_threshold\CD\FF\FF\B0.user_data_limit\00\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B33?+-\ABJ6\CF\11\1E\AB\BB\05z\B9\B5\AD.llpc_version\A468.0\AEamdpal.version\92\03\00"} +!11 = !{i32 0} +!12 = !{} +; CHECK-LABEL: @lgc.shader.VS.main( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @lgc.input.import.generic.i32(i1 false, i32 0, i32 0, i32 0, i32 poison) +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @lgc.load.user.data.i32(i32 0) +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } poison, i32 32, 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP8]], i32 32, 2 +; CHECK-NEXT: [[TMP10:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP9]], i32 1, 3 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP2]], 32 +; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP10]], ptr addrspace(4) [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP13]], align 32, !invariant.load !12 +; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [3 x <8 x i32>] poison, <8 x i32> [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP11]]) +; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP17]], <8 x i32> [[TMP15]]) +; CHECK-NEXT: [[TMP19:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 1, i32 1, <8 x i32> [[TMP18]], i32 1, i32 0) +; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <4 x float>, i32 } [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = call <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP17]], <4 x float> [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <4 x float>, i32 } [[TMP19]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.waterfall.end.i32(i32 [[TMP17]], i32 [[TMP22]]) +; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <4 x float>, i32 } poison, <4 x float> [[TMP21]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <4 x float>, i32 } [[TMP24]], i32 [[TMP23]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { <4 x float>, i32 } [[TMP25]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = extractvalue { <4 x float>, i32 } [[TMP25]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP26]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], <4 x float> [[TMP27]], <4 x float> zeroinitializer +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP29]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: ret void +; diff --git a/script/switch-coding-style.sh b/script/switch-coding-style.sh index 559efd4f73..10d3c6a93a 100755 --- a/script/switch-coding-style.sh +++ b/script/switch-coding-style.sh @@ -225,7 +225,7 @@ CheckOptions: - { key: readability-identifier-naming.MemberPrefix, value: m_ } - { key: readability-identifier-naming.MemberRemovePrefixes, value: 'p,b,pfn,m_p,m_b' } - { key: readability-identifier-naming.MethodCase, value: camelBack } - - { key: readability-identifier-naming.MethodIgnoreRegex, value: '^Create$|^CreateACos$|^CreateACosh$|^CreateASin$|^CreateASinh$|^CreateATan$|^CreateATan2$|^CreateATanh$|^CreateBarrier$|^CreateBinaryIntrinsic$|^CreateCosh$|^CreateCrossProduct$|^CreateCubeFace.*$|^CreateDemoteToHelperInvocation$|^CreateDerivative$|^CreateDeterminant$|^CreateDotProduct$|^CreateEmitVertex$|^CreateEndPrimitive$|^CreateExp$|^CreateExtract.*$|^CreateFaceForward$|^CreateFClamp$|^CreateFindSMsb$|^CreateFma$|^CreateFMax$|^CreateFMax3$|^CreateFMid3$|^CreateFMin$|^CreateFMin3$|^CreateFMod$|^CreateFpTruncWithRounding$|^CreateFract$|^CreateFSign$|^CreateGet.*$|^CreateImage.*$|^CreateIndexDescPtr$|^CreateInsertBitField$|^CreateIntrinsic$|^CreateInverseSqrt$|^CreateIs.*$|^CreateKill$|^CreateLdexp$|^CreateLoad.*$|^CreateLog$|^CreateMapToInt32$|^CreateMatrix.*$|^CreateNormalizeVector$|^CreateOuterProduct$|^CreatePower$|^CreateQuantizeToFp16$|^CreateRead.*$|^CreateReflect$|^CreateRefract$|^CreateSAbs$|^CreateSinh$|^CreateSMod$|^CreateSmoothStep$|^CreateSSign$|^CreateSubgroup.*$|^CreateTan$|^CreateTanh$|^CreateTransposeMatrix$|^CreateUnaryIntrinsic$|^CreateVectorTimesMatrix$|^CreateWrite.*Output$|^Serialize$|^Merge$|^Destroy$|^ConvertColorBufferFormatToExportFormat$|^BuildShaderModule$|^BuildGraphicsPipeline$|^BuildComputePipeline$|^IsVertexFormatSupported$|^DumpSpirvBinary$|^BeginPipelineDump$|^EndPipelineDump$|^DumpPipelineBinary$|^DumpPipelineExtraInfo$|^GetShaderHash$|^GetPipelineHash$|^GetPipelineName$|^CreateShaderCache$|^ReadFromBuffer$|^GetSectionIndex$|^GetSymbolsBySectionIndex$|^GetSectionData$' } + - { key: readability-identifier-naming.MethodIgnoreRegex, value: '^Create$|^CreateACos$|^CreateACosh$|^CreateASin$|^CreateASinh$|^CreateATan$|^CreateATan2$|^CreateATanh$|^CreateBarrier$|^CreateBinaryIntrinsic$|^CreateCosh$|^CreateCrossProduct$|^CreateCubeFace.*$|^CreateDemoteToHelperInvocation$|^CreateDerivative$|^CreateDeterminant$|^CreateDotProduct$|^CreateEmitVertex$|^CreateEndPrimitive$|^CreateExp$|^CreateExtract.*$|^CreateFaceForward$|^CreateFClamp$|^CreateFindSMsb$|^CreateFma$|^CreateFMax$|^CreateFMax3$|^CreateFMid3$|^CreateFMin$|^CreateFMin3$|^CreateFMod$|^CreateFpTruncWithRounding$|^CreateFract$|^CreateFSign$|^CreateGet.*$|^CreateImage.*$|^CreateIndexDescPtr$|^CreateInsertBitField$|^CreateIntrinsic$|^CreateInverseSqrt$|^CreateIs.*$|^CreateKill$|^CreateLdexp$|^CreateLoad.*$|^CreateLog$|^CreateMapToSimpleType$|^CreateMatrix.*$|^CreateNormalizeVector$|^CreateOuterProduct$|^CreatePower$|^CreateQuantizeToFp16$|^CreateRead.*$|^CreateReflect$|^CreateRefract$|^CreateSAbs$|^CreateSinh$|^CreateSMod$|^CreateSmoothStep$|^CreateSSign$|^CreateSubgroup.*$|^CreateTan$|^CreateTanh$|^CreateTransposeMatrix$|^CreateUnaryIntrinsic$|^CreateVectorTimesMatrix$|^CreateWrite.*Output$|^Serialize$|^Merge$|^Destroy$|^ConvertColorBufferFormatToExportFormat$|^BuildShaderModule$|^BuildGraphicsPipeline$|^BuildComputePipeline$|^IsVertexFormatSupported$|^DumpSpirvBinary$|^BeginPipelineDump$|^EndPipelineDump$|^DumpPipelineBinary$|^DumpPipelineExtraInfo$|^GetShaderHash$|^GetPipelineHash$|^GetPipelineName$|^CreateShaderCache$|^ReadFromBuffer$|^GetSectionIndex$|^GetSymbolsBySectionIndex$|^GetSectionData$' } - { key: readability-identifier-naming.FunctionIgnoreRegex, value: 'EnableOuts|EnableErrs' } - { key: readability-identifier-naming.FunctionCase, value: camelBack } - { key: readability-identifier-naming.TypeCase, value: CamelCase }