From 8e6fd609691e94a8d51bc514be4264800b6146ff Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Tue, 4 Aug 2020 02:57:47 -0700 Subject: [PATCH 1/4] Add load for argument matrix subscript lowering Matrix lowering was not handling subscript oeprations when the matrix was a shader input. copying the matrix to a temporary explicitly or implicitly as part of a copy in function call worked around the problem. When the argument copy was eliminated, this problem was exposed. By adding a load for the indicated matrix for a subscript of a shader input matrix, the lowering can procede using the lowered matrix from the load. Additionally, hull shaders with their uninlineable functions were failing to detect the constanthull function as a graphics function so the subscript lowering was being treated as if it were in a library. Even when it got to signature lowering, there was no support for lowering matrix loads. The test for shaderism now tests the function attributes of the module's entry function. The signature lowering for matrix loads is moved into a function that is called by input lowering for both the constant hull and entry functions. Added general tests for matrix subscripts from different memory types as well as specific tests for the argument passing problem in pixel and domain shaders. Fixes #2958 --- lib/HLSL/HLLowerUDT.cpp | 2 +- lib/HLSL/HLMatrixLowerPass.cpp | 30 ++- lib/HLSL/HLMatrixSubscriptUseReplacer.cpp | 18 +- lib/HLSL/HLMatrixSubscriptUseReplacer.h | 5 +- lib/HLSL/HLSignatureLower.cpp | 188 +++++++++++------- .../hlsl/types/matrix/matrix_subscript.hlsl | 64 ++++++ .../types/matrix/matrix_subscript_ds.hlsl | 35 ++++ .../types/matrix/matrix_subscript_hs.hlsl | 57 ++++++ 8 files changed, 311 insertions(+), 88 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl diff --git a/lib/HLSL/HLLowerUDT.cpp b/lib/HLSL/HLLowerUDT.cpp index 4d68940444..1d9abc56b2 100644 --- a/lib/HLSL/HLLowerUDT.cpp +++ b/lib/HLSL/HLLowerUDT.cpp @@ -408,7 +408,7 @@ void hlsl::ReplaceUsesForLoweredUDT(Value *V, Value *NewV) { std::vector DeadInsts; HLMatrixSubscriptUseReplacer UseReplacer( - CI, NewV, ElemIndices, /*AllowLoweredPtrGEPs*/true, DeadInsts); + CI, NewV, /*TempLoweredMatrix*/nullptr, ElemIndices, /*AllowLoweredPtrGEPs*/true, DeadInsts); DXASSERT(CI->use_empty(), "Expected all matrix subscript uses to have been replaced."); CI->eraseFromParent(); diff --git a/lib/HLSL/HLMatrixLowerPass.cpp b/lib/HLSL/HLMatrixLowerPass.cpp index 6b9f760a1c..c080be76b4 100644 --- a/lib/HLSL/HLMatrixLowerPass.cpp +++ b/lib/HLSL/HLMatrixLowerPass.cpp @@ -406,7 +406,7 @@ Value *HLMatrixLowerPass::tryGetLoweredPtrOperand(Value *Ptr, IRBuilder<> &Build RootPtr = GEP->getPointerOperand(); Argument *Arg = dyn_cast(RootPtr); - bool IsNonShaderArg = Arg != nullptr && !m_pHLModule->IsGraphicsShader(Arg->getParent()); + bool IsNonShaderArg = Arg != nullptr && !m_pHLModule->IsGraphicsShader(m_pHLModule->GetEntryFunction()); if (IsNonShaderArg || isa(RootPtr)) { // Bitcast the matrix pointer to its lowered equivalent. // The HLMatrixBitcast pass will take care of this later. @@ -1474,7 +1474,30 @@ void HLMatrixLowerPass::lowerHLMatSubscript(CallInst *Call, Value *MatPtr, Small IRBuilder<> CallBuilder(Call); Value *LoweredPtr = tryGetLoweredPtrOperand(MatPtr, CallBuilder); - if (LoweredPtr == nullptr) return; + Value *LoweredMatrix = nullptr; + if (LoweredPtr == nullptr) { + Value *RootPtr = MatPtr; + while (GEPOperator *GEP = dyn_cast(RootPtr)) + RootPtr = GEP->getPointerOperand(); + + if (!isa(RootPtr)) + return; + + // For a shader input, load the matrix into a lowered ptr + // The load will be handled by LowerSignature + HLMatLoadStoreOpcode Opcode = (HLSubscriptOpcode)GetHLOpcode(Call) == HLSubscriptOpcode::RowMatSubscript ? + HLMatLoadStoreOpcode::RowMatLoad : HLMatLoadStoreOpcode::ColMatLoad; + HLMatrixType MatTy = HLMatrixType::cast(MatPtr->getType()->getPointerElementType()); + LoweredMatrix = callHLFunction( + *m_pModule, HLOpcodeGroup::HLMatLoadStore, static_cast(Opcode), + MatTy.getLoweredVectorTypeForReg(), { CallBuilder.getInt32((uint32_t)Opcode), MatPtr }, + Call->getCalledFunction()->getAttributes().getFnAttributes(), CallBuilder); + HLMatrixSubscriptUseReplacer UseReplacer(Call, LoweredPtr, LoweredMatrix, + ElemIndices, false /*AllowLoweredPtrGEPs*/, m_deadInsts); + DXASSERT(Call->use_empty(), "Expected all matrix subscript uses to have been replaced."); + addToDeadInsts(Call); + return; + } // For global variables, we can GEP directly into the lowered vector pointer. // This is necessary to support group shared memory atomics and the likes. @@ -1484,7 +1507,8 @@ void HLMatrixLowerPass::lowerHLMatSubscript(CallInst *Call, Value *MatPtr, Small bool AllowLoweredPtrGEPs = isa(RootPtr); // Just constructing this does all the work - HLMatrixSubscriptUseReplacer UseReplacer(Call, LoweredPtr, ElemIndices, AllowLoweredPtrGEPs, m_deadInsts); + HLMatrixSubscriptUseReplacer UseReplacer(Call, LoweredPtr, nullptr /*TempLoweredMatrix*/, + ElemIndices, AllowLoweredPtrGEPs, m_deadInsts); DXASSERT(Call->use_empty(), "Expected all matrix subscript uses to have been replaced."); addToDeadInsts(Call); diff --git a/lib/HLSL/HLMatrixSubscriptUseReplacer.cpp b/lib/HLSL/HLMatrixSubscriptUseReplacer.cpp index ba9eeb99f2..d4696ae7f6 100644 --- a/lib/HLSL/HLMatrixSubscriptUseReplacer.cpp +++ b/lib/HLSL/HLMatrixSubscriptUseReplacer.cpp @@ -19,9 +19,10 @@ using namespace llvm; using namespace hlsl; -HLMatrixSubscriptUseReplacer::HLMatrixSubscriptUseReplacer(CallInst* Call, Value *LoweredPtr, +HLMatrixSubscriptUseReplacer::HLMatrixSubscriptUseReplacer(CallInst* Call, Value *LoweredPtr, Value *TempLoweredMatrix, SmallVectorImpl &ElemIndices, bool AllowLoweredPtrGEPs, std::vector &DeadInsts) - : LoweredPtr(LoweredPtr), ElemIndices(ElemIndices), DeadInsts(DeadInsts), AllowLoweredPtrGEPs(AllowLoweredPtrGEPs) + : LoweredPtr(LoweredPtr), ElemIndices(ElemIndices), DeadInsts(DeadInsts), + AllowLoweredPtrGEPs(AllowLoweredPtrGEPs), TempLoweredMatrix(TempLoweredMatrix) { HasScalarResult = !Call->getType()->getPointerElementType()->isVectorTy(); @@ -32,6 +33,11 @@ HLMatrixSubscriptUseReplacer::HLMatrixSubscriptUseReplacer(CallInst* Call, Value } } + if (TempLoweredMatrix) + LoweredTy = TempLoweredMatrix->getType(); + else + LoweredTy = LoweredPtr->getType()->getPointerElementType(); + replaceUses(Call, /* GEPIdx */ nullptr); } @@ -162,7 +168,8 @@ void HLMatrixSubscriptUseReplacer::cacheLoweredMatrix(bool ForDynamicIndexing, I // Load without memory to register representation conversion, // since the point is to mimic pointer semantics - TempLoweredMatrix = Builder.CreateLoad(LoweredPtr); + if (!TempLoweredMatrix) + TempLoweredMatrix = Builder.CreateLoad(LoweredPtr); if (!ForDynamicIndexing) return; @@ -238,7 +245,6 @@ Value *HLMatrixSubscriptUseReplacer::loadVector(IRBuilder<> &Builder) { // Otherwise load elements one by one // Lowered form may be array when AllowLoweredPtrGEPs == true. - Type* LoweredTy = LoweredPtr->getType()->getPointerElementType(); Type* ElemTy = LoweredTy->isVectorTy() ? LoweredTy->getScalarType() : cast(LoweredTy)->getArrayElementType(); VectorType *VecTy = VectorType::get(ElemTy, static_cast(ElemIndices.size())); @@ -270,7 +276,7 @@ void HLMatrixSubscriptUseReplacer::flushLoweredMatrix(IRBuilder<> &Builder) { // First re-create the vector from the temporary array DXASSERT_NOMSG(LazyTempElemArrayAlloca != nullptr); - VectorType *LoweredMatrixTy = cast(LoweredPtr->getType()->getPointerElementType()); + VectorType *LoweredMatrixTy = cast(LoweredTy); TempLoweredMatrix = UndefValue::get(LoweredMatrixTy); Value *GEPIndices[2] = { Builder.getInt32(0), nullptr }; for (unsigned ElemIdx = 0; ElemIdx < LoweredMatrixTy->getNumElements(); ++ElemIdx) { @@ -284,4 +290,4 @@ void HLMatrixSubscriptUseReplacer::flushLoweredMatrix(IRBuilder<> &Builder) { // Store back the lowered matrix to its pointer Builder.CreateStore(TempLoweredMatrix, LoweredPtr); TempLoweredMatrix = nullptr; -} \ No newline at end of file +} diff --git a/lib/HLSL/HLMatrixSubscriptUseReplacer.h b/lib/HLSL/HLMatrixSubscriptUseReplacer.h index afe38704f2..b49aafdb0c 100644 --- a/lib/HLSL/HLMatrixSubscriptUseReplacer.h +++ b/lib/HLSL/HLMatrixSubscriptUseReplacer.h @@ -30,7 +30,7 @@ namespace hlsl { class HLMatrixSubscriptUseReplacer { public: // The constructor does everything - HLMatrixSubscriptUseReplacer(llvm::CallInst* Call, llvm::Value *LoweredPtr, + HLMatrixSubscriptUseReplacer(llvm::CallInst* Call, llvm::Value *LoweredPtr, llvm::Value *TempLoweredMatrix, llvm::SmallVectorImpl &ElemIndices, bool AllowLoweredPtrGEPs, std::vector &DeadInsts); @@ -51,6 +51,7 @@ class HLMatrixSubscriptUseReplacer { bool AllowLoweredPtrGEPs = false; bool HasScalarResult = false; bool HasDynamicElemIndex = false; + llvm::Type *LoweredTy = nullptr; // The entire lowered matrix as loaded from LoweredPtr, // nullptr if we copied it to a temporary array. @@ -64,4 +65,4 @@ class HLMatrixSubscriptUseReplacer { // so we can dynamically index the level 1 indices. llvm::AllocaInst *LazyTempElemIndicesArrayAlloca = nullptr; }; -} // namespace hlsl \ No newline at end of file +} // namespace hlsl diff --git a/lib/HLSL/HLSignatureLower.cpp b/lib/HLSL/HLSignatureLower.cpp index e53241ffc3..583e7a9050 100644 --- a/lib/HLSL/HLSignatureLower.cpp +++ b/lib/HLSL/HLSignatureLower.cpp @@ -588,6 +588,7 @@ Value *GenerateLdInput(Function *loadInput, ArrayRef args, } } + Value *replaceLdWithLdInput(Function *loadInput, LoadInst *ldInst, unsigned cols, MutableArrayRef args, bool bCast) { @@ -654,6 +655,96 @@ Value *replaceLdWithLdInput(Function *loadInput, LoadInst *ldInst, } } + +void replaceMatStWithStOutputs(CallInst *CI, HLMatLoadStoreOpcode matOp, + Function *ldStFunc, Constant *OpArg, Constant *ID, + Constant *columnConsts[],Value *vertexOrPrimID, + Value *idxVal) { + IRBuilder<> LocalBuilder(CI); + Value *Val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); + HLMatrixType MatTy = HLMatrixType::cast( + CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx) + ->getType()->getPointerElementType()); + + Val = MatTy.emitLoweredRegToMem(Val, LocalBuilder); + + if (matOp == HLMatLoadStoreOpcode::ColMatStore) { + for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { + Constant *constColIdx = LocalBuilder.getInt32(c); + Value *colIdx = LocalBuilder.CreateAdd(idxVal, constColIdx); + + for (unsigned r = 0; r < MatTy.getNumRows(); r++) { + unsigned matIdx = MatTy.getColumnMajorIndex(r, c); + Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); + LocalBuilder.CreateCall(ldStFunc, + { OpArg, ID, colIdx, columnConsts[r], Elt }); + } + } + } else { + for (unsigned r = 0; r < MatTy.getNumRows(); r++) { + Constant *constRowIdx = LocalBuilder.getInt32(r); + Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); + for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { + unsigned matIdx = MatTy.getRowMajorIndex(r, c); + Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); + LocalBuilder.CreateCall(ldStFunc, + { OpArg, ID, rowIdx, columnConsts[c], Elt }); + } + } + } + CI->eraseFromParent(); +} + + +void replaceMatLdWithLdInputs(CallInst *CI, HLMatLoadStoreOpcode matOp, + Function *ldStFunc, Constant *OpArg, Constant *ID, + Constant *columnConsts[],Value *vertexOrPrimID, + Value *idxVal) { + IRBuilder<> LocalBuilder(CI); + HLMatrixType MatTy = HLMatrixType::cast( + CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) + ->getType()->getPointerElementType()); + std::vector matElts(MatTy.getNumElements()); + + if (matOp == HLMatLoadStoreOpcode::ColMatLoad) { + for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { + Constant *constRowIdx = LocalBuilder.getInt32(c); + Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); + for (unsigned r = 0; r < MatTy.getNumRows(); r++) { + SmallVector args = { OpArg, ID, rowIdx, columnConsts[r] }; + if (vertexOrPrimID) + args.emplace_back(vertexOrPrimID); + + Value *input = LocalBuilder.CreateCall(ldStFunc, args); + unsigned matIdx = MatTy.getColumnMajorIndex(r, c); + matElts[matIdx] = input; + } + } + } else { + for (unsigned r = 0; r < MatTy.getNumRows(); r++) { + Constant *constRowIdx = LocalBuilder.getInt32(r); + Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); + for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { + SmallVector args = { OpArg, ID, rowIdx, columnConsts[c] }; + if (vertexOrPrimID) + args.emplace_back(vertexOrPrimID); + + Value *input = LocalBuilder.CreateCall(ldStFunc, args); + unsigned matIdx = MatTy.getRowMajorIndex(r, c); + matElts[matIdx] = input; + } + } + } + + Value *newVec = + HLMatrixLower::BuildVector(matElts[0]->getType(), matElts, LocalBuilder); + newVec = MatTy.emitLoweredMemToReg(newVec, LocalBuilder); + + CI->replaceAllUsesWith(newVec); + CI->eraseFromParent(); +} + + void replaceDirectInputParameter(Value *param, Function *loadInput, unsigned cols, MutableArrayRef args, bool bCast, OP *hlslOP, IRBuilder<> &Builder) { @@ -964,84 +1055,11 @@ void GenerateInputOutputUserCall(InputOutputAccessInfo &info, Value *undefVertex switch (matOp) { case HLMatLoadStoreOpcode::ColMatLoad: case HLMatLoadStoreOpcode::RowMatLoad: { - IRBuilder<> LocalBuilder(CI); - HLMatrixType MatTy = HLMatrixType::cast( - CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx) - ->getType()->getPointerElementType()); - std::vector matElts(MatTy.getNumElements()); - - if (matOp == HLMatLoadStoreOpcode::ColMatLoad) { - for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { - Constant *constRowIdx = LocalBuilder.getInt32(c); - Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); - for (unsigned r = 0; r < MatTy.getNumRows(); r++) { - SmallVector args = { OpArg, ID, rowIdx, columnConsts[r] }; - if (vertexOrPrimID) - args.emplace_back(vertexOrPrimID); - - Value *input = LocalBuilder.CreateCall(ldStFunc, args); - unsigned matIdx = MatTy.getColumnMajorIndex(r, c); - matElts[matIdx] = input; - } - } - } else { - for (unsigned r = 0; r < MatTy.getNumRows(); r++) { - Constant *constRowIdx = LocalBuilder.getInt32(r); - Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); - for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { - SmallVector args = { OpArg, ID, rowIdx, columnConsts[c] }; - if (vertexOrPrimID) - args.emplace_back(vertexOrPrimID); - - Value *input = LocalBuilder.CreateCall(ldStFunc, args); - unsigned matIdx = MatTy.getRowMajorIndex(r, c); - matElts[matIdx] = input; - } - } - } - - Value *newVec = - HLMatrixLower::BuildVector(matElts[0]->getType(), matElts, LocalBuilder); - newVec = MatTy.emitLoweredMemToReg(newVec, LocalBuilder); - - CI->replaceAllUsesWith(newVec); - CI->eraseFromParent(); + replaceMatLdWithLdInputs(CI, matOp, ldStFunc, OpArg, ID, columnConsts, vertexOrPrimID, idxVal); } break; case HLMatLoadStoreOpcode::ColMatStore: case HLMatLoadStoreOpcode::RowMatStore: { - IRBuilder<> LocalBuilder(CI); - Value *Val = CI->getArgOperand(HLOperandIndex::kMatStoreValOpIdx); - HLMatrixType MatTy = HLMatrixType::cast( - CI->getArgOperand(HLOperandIndex::kMatStoreDstPtrOpIdx) - ->getType()->getPointerElementType()); - - Val = MatTy.emitLoweredRegToMem(Val, LocalBuilder); - - if (matOp == HLMatLoadStoreOpcode::ColMatStore) { - for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { - Constant *constColIdx = LocalBuilder.getInt32(c); - Value *colIdx = LocalBuilder.CreateAdd(idxVal, constColIdx); - - for (unsigned r = 0; r < MatTy.getNumRows(); r++) { - unsigned matIdx = MatTy.getColumnMajorIndex(r, c); - Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); - LocalBuilder.CreateCall(ldStFunc, - { OpArg, ID, colIdx, columnConsts[r], Elt }); - } - } - } else { - for (unsigned r = 0; r < MatTy.getNumRows(); r++) { - Constant *constRowIdx = LocalBuilder.getInt32(r); - Value *rowIdx = LocalBuilder.CreateAdd(idxVal, constRowIdx); - for (unsigned c = 0; c < MatTy.getNumColumns(); c++) { - unsigned matIdx = MatTy.getRowMajorIndex(r, c); - Value *Elt = LocalBuilder.CreateExtractElement(Val, matIdx); - LocalBuilder.CreateCall(ldStFunc, - { OpArg, ID, rowIdx, columnConsts[c], Elt }); - } - } - } - CI->eraseFromParent(); + replaceMatStWithStOutputs(CI, matOp, ldStFunc, OpArg, ID, columnConsts, vertexOrPrimID, idxVal); } break; } } else { @@ -1386,6 +1404,14 @@ void HLSignatureLower::GenerateDxilPatchConstantFunctionInputs() { Type *i1Ty = Type::getInt1Ty(constZero->getContext()); Type *i32Ty = constZero->getType(); + Constant *columnConsts[] = { + hlslOP->GetU8Const(0), hlslOP->GetU8Const(1), hlslOP->GetU8Const(2), + hlslOP->GetU8Const(3), hlslOP->GetU8Const(4), hlslOP->GetU8Const(5), + hlslOP->GetU8Const(6), hlslOP->GetU8Const(7), hlslOP->GetU8Const(8), + hlslOP->GetU8Const(9), hlslOP->GetU8Const(10), hlslOP->GetU8Const(11), + hlslOP->GetU8Const(12), hlslOP->GetU8Const(13), hlslOP->GetU8Const(14), + hlslOP->GetU8Const(15)}; + for (Argument &arg : patchConstantFunc->args()) { DxilParameterAnnotation ¶mAnnotation = patchFuncAnnotation->GetParameterAnnotation(arg.getArgNo()); @@ -1422,11 +1448,21 @@ void HLSignatureLower::GenerateDxilPatchConstantFunctionInputs() { collectInputOutputAccessInfo(&arg, constZero, accessInfoList, /*hasVertexOrPrimID*/ true, true, bRowMajor, false); for (InputOutputAccessInfo &info : accessInfoList) { + Constant *OpArg = hlslOP->GetU32Const((unsigned)opcode); if (LoadInst *ldInst = dyn_cast(info.user)) { - Constant *OpArg = hlslOP->GetU32Const((unsigned)opcode); Value *args[] = {OpArg, inputID, info.idx, info.vectorIdx, info.vertexOrPrimID}; replaceLdWithLdInput(dxilLdFunc, ldInst, cols, args, bI1Cast); + } else if (CallInst *CI = dyn_cast(info.user)) { + HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); + // Intrinsic will be translated later. + if (group == HLOpcodeGroup::HLIntrinsic || group == HLOpcodeGroup::NotHL) + return; + unsigned opcode = GetHLOpcode(CI); + DXASSERT_NOMSG(group == HLOpcodeGroup::HLMatLoadStore); + HLMatLoadStoreOpcode matOp = static_cast(opcode); + if (matOp == HLMatLoadStoreOpcode::ColMatLoad || matOp == HLMatLoadStoreOpcode::RowMatLoad) + replaceMatLdWithLdInputs(CI, matOp, dxilLdFunc, OpArg, inputID, columnConsts, info.vertexOrPrimID, info.idx); } else { DXASSERT(0, "input should only be ld"); } diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl new file mode 100644 index 0000000000..855e822494 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl @@ -0,0 +1,64 @@ +// RUN: %dxc -DMIDX=1 -DVIDX=2 -T ps_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=2 -T ps_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=j -T ps_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=j -T ps_6_0 %s | FileCheck %s + +// Test for general subscript operations on matrix arrays. +// Specifically focused on shader inputs which failed to lower previously + +float3 GetRow(const float3x3 m, const int j) +{ + return m[j]; +} + +float3x3 g[2]; +groupshared float3x3 gs[2]; + +struct JustMtx { + float3x3 mtx; +}; + +struct MtxArray { + float3x3 mtx[2]; +}; + +float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm[2] : JM, MtxArray ma : A) : SV_Target +{ + float3 ret = 0.0; + + // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32 + // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32 + // CHECK: call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32 + ret += g[MIDX][VIDX]; + + // CHECK: load float, float addrspace(3)* + // CHECK: load float, float addrspace(3)* + // CHECK: load float, float addrspace(3)* + ret += gs[MIDX][VIDX]; + + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 2, i32 {{%?[0-9]*}}, i8 2, i32 undef) + ret += m[MIDX][VIDX]; + + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 3, i32 {{%?[0-9]*}}, i8 2, i32 undef) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 3, i32 {{%?[0-9]*}}, i8 2, i32 undef) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 3, i32 {{%?[0-9]*}}, i8 2, i32 undef) + ret += jm[MIDX].mtx[VIDX]; + + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 4, i32 {{%?[0-9]*}}, i8 2, i32 undef) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 4, i32 {{%?[0-9]*}}, i8 2, i32 undef) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 4, i32 {{%?[0-9]*}}, i8 2, i32 undef) + ret += ma.mtx[MIDX][VIDX]; + + ret += GetRow(g[MIDX], VIDX); + ret += GetRow(gs[MIDX], VIDX); + ret += GetRow(m[MIDX], VIDX); + ret += GetRow(jm[MIDX].mtx, VIDX); + ret += GetRow(ma.mtx[MIDX], VIDX); + + // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %{{.*}}) + // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %{{.*}}) + // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %{{.*}}) + return ret; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl new file mode 100644 index 0000000000..789f0d2171 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl @@ -0,0 +1,35 @@ +// RUN: %dxc -DMIDX=1 -DVIDX=2 -T ds_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=2 -T ds_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=j -T ds_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=j -T ds_6_0 %s | FileCheck %s + +// Specific test for subscript operation on OutputPatch matrix data + +struct MatStruct { + float4x4 mtx : M; +}; + +float4 GetRow(const OutputPatch tri, int i, int j) +{ + return tri[MIDX].mtx[VIDX]; +} + +[domain("tri")] +float4 main(int i : I, int j : J, const OutputPatch tri) : SV_Position { + + float4 ret = 0; + + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 {{%?[0-9]*}}, i8 2, i32 {{%?[0-9]*}}) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 {{%?[0-9]*}}, i8 2, i32 {{%?[0-9]*}}) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 {{%?[0-9]*}}, i8 2, i32 {{%?[0-9]*}}) + // CHECK: call float @dx.op.loadInput.f32(i32 4, i32 0, i32 {{%?[0-9]*}}, i8 2, i32 {{%?[0-9]*}}) + ret += tri[MIDX].mtx[VIDX]; + + ret += GetRow(tri, MIDX, VIDX); + + // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %{{.*}}) + // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %{{.*}}) + // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %{{.*}}) + // CHECK: call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %{{.*}}) + return ret; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl new file mode 100644 index 0000000000..4a98a7584c --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl @@ -0,0 +1,57 @@ +// RUN: %dxc -DMIDX=1 -DVIDX=2 -T hs_6_0 %s | FileCheck %s +// RN: %dxc -DMIDX=i -DVIDX=2 -T hs_6_0 %s | FileCheck %s +// RN: %dxc -DMIDX=1 -DVIDX=j -T hs_6_0 %s | FileCheck %s +// RN: %dxc -DMIDX=i -DVIDX=j -T hs_6_0 %s | FileCheck %s + +// Specific test for subscript operation on matrix array inputs in patch functions + +struct MatStruct { + int2 uv : TEXCOORD0; + float3x4 m_ObjectToWorld : TEXCOORD1; +}; + +struct Output { + float edges[3] : SV_TessFactor; + float inside : SV_InsideTessFactor; +}; + +// CHECK: call float @dx.op.loadInput.f32 +// CHECK: call void @dx.op.storePatchConstant.f32 +// CHECK: call float @dx.op.loadInput.f32 +// CHECK: call void @dx.op.storePatchConstant.f32 +// CHECK: call float @dx.op.loadInput.f32 +// CHECK: call void @dx.op.storePatchConstant.f32 +// CHECK: call void @dx.op.storePatchConstant.f32 +Output Patch(InputPatch inputs) +{ + Output ret; + int i = inputs[0].uv.x; + int j = inputs[0].uv.y; + + ret.edges[0] = inputs[MIDX].m_ObjectToWorld[VIDX][0]; + ret.edges[1] = inputs[MIDX].m_ObjectToWorld[VIDX][1]; + ret.edges[2] = inputs[MIDX].m_ObjectToWorld[VIDX][2]; + ret.inside = 1.0f; + return ret; +} + + +// CHECK: call float @dx.op.loadInput.f32 +// CHECK: call float @dx.op.loadInput.f32 +// CHECK: call float @dx.op.loadInput.f32 +// CHECK: call float @dx.op.loadInput.f32 +// CHECK: call void @dx.op.storeOutput.f32 +// CHECK: call void @dx.op.storeOutput.f32 +// CHECK: call void @dx.op.storeOutput.f32 +// CHECK: call void @dx.op.storeOutput.f32 +[domain("tri")] +[partitioning("fractional_odd")] +[outputtopology("triangle_cw")] +[patchconstantfunc("Patch")] +[outputcontrolpoints(3)] +float4 main(InputPatch inputs) : SV_Position +{ + int i = inputs[0].uv.x; + int j = inputs[0].uv.y; + return inputs[MIDX].m_ObjectToWorld[VIDX]; +} From e400c528c417d4f6b24d01fe01ab6b3a9f431f0f Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 5 Aug 2020 08:51:41 -0700 Subject: [PATCH 2/4] Enable additional hull tests --- .../hlsl/types/matrix/matrix_subscript_hs.hlsl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl index 4a98a7584c..ad9158161a 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl @@ -1,7 +1,7 @@ // RUN: %dxc -DMIDX=1 -DVIDX=2 -T hs_6_0 %s | FileCheck %s -// RN: %dxc -DMIDX=i -DVIDX=2 -T hs_6_0 %s | FileCheck %s -// RN: %dxc -DMIDX=1 -DVIDX=j -T hs_6_0 %s | FileCheck %s -// RN: %dxc -DMIDX=i -DVIDX=j -T hs_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=2 -T hs_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=j -T hs_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=j -T hs_6_0 %s | FileCheck %s // Specific test for subscript operation on matrix array inputs in patch functions @@ -15,10 +15,10 @@ struct Output { float inside : SV_InsideTessFactor; }; +// Instruction order here is a bit inconsistent. +// So we can't test for all the outputs // CHECK: call float @dx.op.loadInput.f32 -// CHECK: call void @dx.op.storePatchConstant.f32 // CHECK: call float @dx.op.loadInput.f32 -// CHECK: call void @dx.op.storePatchConstant.f32 // CHECK: call float @dx.op.loadInput.f32 // CHECK: call void @dx.op.storePatchConstant.f32 // CHECK: call void @dx.op.storePatchConstant.f32 From bd4bb0cfe9b72e30078c7a2fc4fb76dab0c7f5a7 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 5 Aug 2020 09:27:37 -0700 Subject: [PATCH 3/4] consolidate some matrix lowering code --- lib/HLSL/HLMatrixLowerPass.cpp | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/lib/HLSL/HLMatrixLowerPass.cpp b/lib/HLSL/HLMatrixLowerPass.cpp index c080be76b4..a1dd720fa4 100644 --- a/lib/HLSL/HLMatrixLowerPass.cpp +++ b/lib/HLSL/HLMatrixLowerPass.cpp @@ -1475,11 +1475,11 @@ void HLMatrixLowerPass::lowerHLMatSubscript(CallInst *Call, Value *MatPtr, Small IRBuilder<> CallBuilder(Call); Value *LoweredPtr = tryGetLoweredPtrOperand(MatPtr, CallBuilder); Value *LoweredMatrix = nullptr; - if (LoweredPtr == nullptr) { - Value *RootPtr = MatPtr; - while (GEPOperator *GEP = dyn_cast(RootPtr)) - RootPtr = GEP->getPointerOperand(); + Value *RootPtr = LoweredPtr? LoweredPtr: MatPtr; + while (GEPOperator *GEP = dyn_cast(RootPtr)) + RootPtr = GEP->getPointerOperand(); + if (LoweredPtr == nullptr) { if (!isa(RootPtr)) return; @@ -1492,22 +1492,13 @@ void HLMatrixLowerPass::lowerHLMatSubscript(CallInst *Call, Value *MatPtr, Small *m_pModule, HLOpcodeGroup::HLMatLoadStore, static_cast(Opcode), MatTy.getLoweredVectorTypeForReg(), { CallBuilder.getInt32((uint32_t)Opcode), MatPtr }, Call->getCalledFunction()->getAttributes().getFnAttributes(), CallBuilder); - HLMatrixSubscriptUseReplacer UseReplacer(Call, LoweredPtr, LoweredMatrix, - ElemIndices, false /*AllowLoweredPtrGEPs*/, m_deadInsts); - DXASSERT(Call->use_empty(), "Expected all matrix subscript uses to have been replaced."); - addToDeadInsts(Call); - return; } - // For global variables, we can GEP directly into the lowered vector pointer. // This is necessary to support group shared memory atomics and the likes. - Value *RootPtr = LoweredPtr; - while (GEPOperator *GEP = dyn_cast(RootPtr)) - RootPtr = GEP->getPointerOperand(); bool AllowLoweredPtrGEPs = isa(RootPtr); // Just constructing this does all the work - HLMatrixSubscriptUseReplacer UseReplacer(Call, LoweredPtr, nullptr /*TempLoweredMatrix*/, + HLMatrixSubscriptUseReplacer UseReplacer(Call, LoweredPtr, LoweredMatrix, ElemIndices, AllowLoweredPtrGEPs, m_deadInsts); DXASSERT(Call->use_empty(), "Expected all matrix subscript uses to have been replaced."); From 5e863b8a0007f3a7b6eaa0bc3bb39c488c30c43c Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Fri, 7 Aug 2020 05:22:27 -0700 Subject: [PATCH 4/4] broaden detection of signature entries A more correct way to identify functions that should delay the lowering of their matrix parameters to signature lowering time is to query whether the function has a signature. This covers entry functions for graphics shaders and also constant patch functions. Adds library variants to existing tests as well --- lib/HLSL/HLMatrixLowerPass.cpp | 2 +- .../HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl | 5 +++++ .../HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl | 5 +++++ .../HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl | 5 +++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/HLSL/HLMatrixLowerPass.cpp b/lib/HLSL/HLMatrixLowerPass.cpp index a1dd720fa4..55e7f4be56 100644 --- a/lib/HLSL/HLMatrixLowerPass.cpp +++ b/lib/HLSL/HLMatrixLowerPass.cpp @@ -406,7 +406,7 @@ Value *HLMatrixLowerPass::tryGetLoweredPtrOperand(Value *Ptr, IRBuilder<> &Build RootPtr = GEP->getPointerOperand(); Argument *Arg = dyn_cast(RootPtr); - bool IsNonShaderArg = Arg != nullptr && !m_pHLModule->IsGraphicsShader(m_pHLModule->GetEntryFunction()); + bool IsNonShaderArg = Arg != nullptr && !m_pHLModule->IsEntryThatUsesSignatures(Arg->getParent()); if (IsNonShaderArg || isa(RootPtr)) { // Bitcast the matrix pointer to its lowered equivalent. // The HLMatrixBitcast pass will take care of this later. diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl index 855e822494..ec543822bc 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript.hlsl @@ -2,6 +2,10 @@ // RUN: %dxc -DMIDX=i -DVIDX=2 -T ps_6_0 %s | FileCheck %s // RUN: %dxc -DMIDX=1 -DVIDX=j -T ps_6_0 %s | FileCheck %s // RUN: %dxc -DMIDX=i -DVIDX=j -T ps_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=2 -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=2 -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=j -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=j -T lib_6_3 %s | FileCheck %s // Test for general subscript operations on matrix arrays. // Specifically focused on shader inputs which failed to lower previously @@ -22,6 +26,7 @@ struct MtxArray { float3x3 mtx[2]; }; +[shader("pixel")] float3 main(const int i : I, const int j : J, const float3x3 m[2]: M, JustMtx jm[2] : JM, MtxArray ma : A) : SV_Target { float3 ret = 0.0; diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl index 789f0d2171..7ddabc3702 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_ds.hlsl @@ -2,6 +2,10 @@ // RUN: %dxc -DMIDX=i -DVIDX=2 -T ds_6_0 %s | FileCheck %s // RUN: %dxc -DMIDX=1 -DVIDX=j -T ds_6_0 %s | FileCheck %s // RUN: %dxc -DMIDX=i -DVIDX=j -T ds_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=2 -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=2 -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=j -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=j -T lib_6_3 %s | FileCheck %s // Specific test for subscript operation on OutputPatch matrix data @@ -15,6 +19,7 @@ float4 GetRow(const OutputPatch tri, int i, int j) } [domain("tri")] +[shader("domain")] float4 main(int i : I, int j : J, const OutputPatch tri) : SV_Position { float4 ret = 0; diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl index ad9158161a..ed210634c7 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/matrix/matrix_subscript_hs.hlsl @@ -2,6 +2,10 @@ // RUN: %dxc -DMIDX=i -DVIDX=2 -T hs_6_0 %s | FileCheck %s // RUN: %dxc -DMIDX=1 -DVIDX=j -T hs_6_0 %s | FileCheck %s // RUN: %dxc -DMIDX=i -DVIDX=j -T hs_6_0 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=2 -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=2 -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=1 -DVIDX=j -T lib_6_3 %s | FileCheck %s +// RUN: %dxc -DMIDX=i -DVIDX=j -T lib_6_3 %s | FileCheck %s // Specific test for subscript operation on matrix array inputs in patch functions @@ -49,6 +53,7 @@ Output Patch(InputPatch inputs) [outputtopology("triangle_cw")] [patchconstantfunc("Patch")] [outputcontrolpoints(3)] +[shader("hull")] float4 main(InputPatch inputs) : SV_Position { int i = inputs[0].uv.x;