Skip to content

Commit

Permalink
New barrier DXIL Op fixes for Validation and RDAT function compatibil…
Browse files Browse the repository at this point in the history
…ity info (#6291) (#6331)

Fix barrier allowed ops and flags by shader kind

New barrier operations lacked validation and for RDAT info: had
incorrect min target and shader stage flags.

- Identify barrier DXIL operations with new `is_barrier` in `hctdb.py`
and generated `OP::IsDxilOpBarrier`.
- Identify when a barrier op requires shader stage with group
(compute-like stage), or when it requires node memory.
- Add new `OptFeatureInfo_RequiresGroup` to identify function only
compatible with a shader stage with a visible group for access to
groupshared memory or use of group sync.
- Translate to original `BarrierMode` when compatible; adds
`BarrierMode::Invalid` to identify invalid cases.
- Account for `DXIL::MemoryTypeFlags::AllMemory` being allowed and
auto-masked by driver.
- Properly set min shader model and compatible shader stage flags.
- Validate barrier for shader stage.
- Added new barriers to counters which were missing.

Adressing parts of: #6256 and #6292
Fixes #6266

(cherry picked from commit 2314d06)
  • Loading branch information
tex3d authored Feb 20, 2024
1 parent 8bb9f28 commit 1577fa9
Show file tree
Hide file tree
Showing 22 changed files with 598 additions and 60 deletions.
1 change: 1 addition & 0 deletions docs/DXIL.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3063,6 +3063,7 @@ INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Me
INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional.
INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant
INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory.
INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true.
INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV.
INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature
Expand Down
15 changes: 12 additions & 3 deletions include/dxc/DXIL/DxilConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -1484,6 +1484,7 @@ enum class AtomicBinOpCode : unsigned {

// Barrier/fence modes.
enum class BarrierMode : unsigned {
Invalid = 0,
SyncThreadGroup = 0x00000001,
UAVFenceGlobal = 0x00000002,
UAVFenceThreadGroup = 0x00000004,
Expand Down Expand Up @@ -1844,15 +1845,19 @@ enum class MemoryTypeFlag : uint32_t {
NodeInputMemory = 0x00000004, // NODE_INPUT_MEMORY
NodeOutputMemory = 0x00000008, // NODE_OUTPUT_MEMORY
AllMemory = 0x0000000F, // ALL_MEMORY
ValidMask = 0x0000000F
ValidMask = 0x0000000F,
NodeFlags = NodeInputMemory | NodeOutputMemory,
LegacyFlags = UavMemory | GroupSharedMemory,
GroupFlags = GroupSharedMemory,
};

// Corresponds to SEMANTIC_FLAG enums in HLSL
enum class BarrierSemanticFlag : uint32_t {
GroupSync = 0x00000001, // GROUP_SYNC
GroupScope = 0x00000002, // GROUP_SCOPE
DeviceScope = 0x00000004, // DEVICE_SCOPE
ValidMask = 0x00000007
ValidMask = 0x00000007,
GroupFlags = GroupSync | GroupScope,
};

// Constant for Container.
Expand Down Expand Up @@ -1940,8 +1945,12 @@ static_assert(ShaderFeatureInfoCount <= 40,
// support it, or to determine when the flag
// ShaderFeatureInfo_DerivativesInMeshAndAmpShaders is required.
const uint64_t OptFeatureInfo_UsesDerivatives = 0x0000010000000000ULL;
// OptFeatureInfo_RequiresGroup tracks whether a function requires a visible
// group that supports things like groupshared memory and group sync.
const uint64_t OptFeatureInfo_RequiresGroup = 0x0000020000000000ULL;

const uint64_t OptFeatureInfoShift = 40;
const unsigned OptFeatureInfoCount = 1;
const unsigned OptFeatureInfoCount = 2;
static_assert(OptFeatureInfoCount <= 23,
"OptFeatureInfo flags must fit in 23 bits; after that we need to "
"expand the FeatureInfo blob part and start defining a new set "
Expand Down
4 changes: 4 additions & 0 deletions include/dxc/DXIL/DxilOperations.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ class OP {
static bool IsDxilOpWave(OpCode C);
static bool IsDxilOpGradient(OpCode C);
static bool IsDxilOpFeedback(OpCode C);
static bool IsDxilOpBarrier(OpCode C);
static bool BarrierRequiresGroup(const llvm::CallInst *CI);
static bool BarrierRequiresNode(const llvm::CallInst *CI);
static DXIL::BarrierMode TranslateToBarrierMode(const llvm::CallInst *CI);
static bool IsDxilOpTypeName(llvm::StringRef name);
static bool IsDxilOpType(llvm::StructType *ST);
static bool IsDupDxilOpType(llvm::StructType *ST);
Expand Down
19 changes: 16 additions & 3 deletions include/dxc/DXIL/DxilShaderFlags.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class ShaderFlags {
void SetShaderFlagsRaw(uint64_t data);
void CombineShaderFlags(const ShaderFlags &other);

void ClearLocalFlags();

void SetDisableOptimizations(bool flag) { m_bDisableOptimizations = flag; }
bool GetDisableOptimizations() const { return m_bDisableOptimizations; }

Expand Down Expand Up @@ -214,10 +216,13 @@ class ShaderFlags {
void SetWaveMMA(bool flag) { m_bWaveMMA = flag; }
bool GetWaveMMA() const { return m_bWaveMMA; }

// Per-function flag
// Per-function flags
void SetUsesDerivatives(bool flag) { m_bUsesDerivatives = flag; }
bool GetUsesDerivatives() const { return m_bUsesDerivatives; }

void SetRequiresGroup(bool flag) { m_bRequiresGroup = flag; }
bool GetRequiresGroup() const { return m_bRequiresGroup; }

private:
// Bit: 0
unsigned
Expand Down Expand Up @@ -331,12 +336,20 @@ class ShaderFlags {
m_bSampleCmpGradientOrBias : 1; // SHADER_FEATURE_SAMPLE_CMP_GRADIENT_OR_BIAS
unsigned m_bExtendedCommandInfo : 1; // SHADER_FEATURE_EXTENDED_COMMAND_INFO

// Per-function flag
// Per-function flags
// Bit: 39
unsigned m_bUsesDerivatives : 1; // SHADER_FEATURE_OPT_USES_DERIVATIVES
// (OptFeatureInfo_UsesDerivatives)

uint32_t m_align1 : 24; // align to 64 bit.
// m_bRequiresGroup indicates that the function requires a visible group.
// For instance, to access group shared memory or use group sync.
// This is necessary because shader stage is insufficient to indicate group
// availability with the advent of thread launch node shaders.
// Bit: 40
unsigned m_bRequiresGroup : 1; // SHADER_FEATURE_OPT_REQUIRES_GROUP
// (OptFeatureInfo_RequiresGroup)

uint32_t m_align1 : 23; // align to 64 bit.
};

} // namespace hlsl
3 changes: 2 additions & 1 deletion include/dxc/DxilContainer/RDAT_LibraryTypes.inl
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ RDAT_ENUM_START(DxilFeatureInfo2, uint32_t)
RDAT_ENUM_VALUE(ExtendedCommandInfo, 0x1)
// OptFeatureInfo flags
RDAT_ENUM_VALUE(Opt_UsesDerivatives, 0x100)
RDAT_ENUM_VALUE(Opt_RequiresGroup, 0x200)
#if DEF_RDAT_ENUMS == DEF_RDAT_DUMP_IMPL
static_assert(DXIL::ShaderFeatureInfoCount == 33,
"otherwise, RDAT_ENUM definition needs updating");
static_assert(DXIL::OptFeatureInfoCount == 1,
static_assert(DXIL::OptFeatureInfoCount == 2,
"otherwise, RDAT_ENUM definition needs updating");
#endif
RDAT_ENUM_END()
Expand Down
28 changes: 13 additions & 15 deletions lib/DXIL/DxilModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,15 +293,15 @@ void DxilModule::CollectShaderFlagsForModule(ShaderFlags &Flags) {
for (auto &itInfo : m_FuncToShaderCompat)
Flags.CombineShaderFlags(itInfo.second.shaderFlags);

// Clear UsesDerivatives flag for module, making sure
// DerivativesInMeshAndAmpShaders is set for MS/AS.
const ShaderModel *SM = GetShaderModel();

// Set DerivativesInMeshAndAmpShaders if necessary for MS/AS.
if (Flags.GetUsesDerivatives()) {
Flags.SetUsesDerivatives(false);
if (m_pSM->IsMS() || m_pSM->IsAS())
if (SM->IsMS() || SM->IsAS())
Flags.SetDerivativesInMeshAndAmpShaders(true);
}

const ShaderModel *SM = GetShaderModel();
// Clear function-local flags not intended for the module.
Flags.ClearLocalFlags();

unsigned NumUAVs = 0;
const unsigned kSmallUAVCount = 8;
Expand Down Expand Up @@ -2117,11 +2117,11 @@ bool DxilModule::ShaderCompatInfo::Merge(ShaderCompatInfo &other) {
// Compare that minimum required version to the values passed in with
// `minMajor` and `minMinor` and pass the maximum of those back through those
// same variables.
// Return adjusted `ShaderFlags` according to `props` set.
static ShaderFlags
AdjustMinimumShaderModelAndFlags(ShaderFlags flags,
const DxilFunctionProps *props,
unsigned &minMajor, unsigned &minMinor) {
// Adjusts `ShaderFlags` argument according to `props` set.
static void AdjustMinimumShaderModelAndFlags(const DxilFunctionProps *props,
ShaderFlags &flags,
unsigned &minMajor,
unsigned &minMinor) {
// Adjust flags based on DxilFunctionProps and compute minimum shader model.
// Library functions use flags to capture properties that may or may not be
// used in the final shader, depending on that final shader's shader model.
Expand Down Expand Up @@ -2189,8 +2189,6 @@ AdjustMinimumShaderModelAndFlags(ShaderFlags flags,
DXIL::UpdateToMaxOfVersions(minMajor, minMinor, 6, 2);
else if (flags.GetViewID() || flags.GetBarycentrics())
DXIL::UpdateToMaxOfVersions(minMajor, minMinor, 6, 1);

return flags;
}

void DxilModule::ComputeShaderCompatInfo() {
Expand Down Expand Up @@ -2277,8 +2275,8 @@ void DxilModule::ComputeShaderCompatInfo() {
ShaderFlags &flags = info.shaderFlags;
if (dxil18Plus) {
// This handles WaveSize requirement as well.
flags = AdjustMinimumShaderModelAndFlags(flags, props, info.minMajor,
info.minMinor);
AdjustMinimumShaderModelAndFlags(props, flags, info.minMajor,
info.minMinor);
} else {
// Match prior versions that were missing some feature detection.
if (flags.GetUseNativeLowPrecision() && flags.GetLowPrecisionPresent())
Expand Down
178 changes: 160 additions & 18 deletions lib/DXIL/DxilOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2876,6 +2876,135 @@ bool OP::IsDxilOpFeedback(OpCode C) {
// OPCODE-FEEDBACK:END
}

bool OP::IsDxilOpBarrier(OpCode C) {
unsigned op = (unsigned)C;
// clang-format off
// Python lines need to be not formatted.
/* <py::lines('OPCODE-BARRIER')>hctdb_instrhelp.get_instrs_pred("op", "is_barrier")</py>*/
// clang-format on
// OPCODE-BARRIER:BEGIN
// Instructions: Barrier=80, BarrierByMemoryType=244,
// BarrierByMemoryHandle=245, BarrierByNodeRecordHandle=246
return op == 80 || (244 <= op && op <= 246);
// OPCODE-BARRIER:END
}

static unsigned MaskMemoryTypeFlagsIfAllowed(unsigned memoryTypeFlags,
unsigned allowedMask) {
// If the memory type is AllMemory, masking inapplicable flags is allowed.
if (memoryTypeFlags != (unsigned)DXIL::MemoryTypeFlag::AllMemory)
return memoryTypeFlags;
return memoryTypeFlags & allowedMask;
}

bool OP::BarrierRequiresGroup(const llvm::CallInst *CI) {
OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
switch (opcode) {
case OpCode::Barrier: {
DxilInst_Barrier barrier(const_cast<CallInst *>(CI));
if (isa<ConstantInt>(barrier.get_barrierMode())) {
unsigned mode = barrier.get_barrierMode_val();
return (mode != (unsigned)DXIL::BarrierMode::UAVFenceGlobal);
}
return false;
}
case OpCode::BarrierByMemoryType: {
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
if (isa<ConstantInt>(barrier.get_MemoryTypeFlags())) {
unsigned memoryTypeFlags = barrier.get_MemoryTypeFlags_val();
memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
memoryTypeFlags, ~(unsigned)DXIL::MemoryTypeFlag::GroupFlags);
if (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::GroupFlags)
return true;
}
}
LLVM_FALLTHROUGH;
case OpCode::BarrierByMemoryHandle:
case OpCode::BarrierByNodeRecordHandle: {
// BarrierByMemoryType, BarrierByMemoryHandle, and BarrierByNodeRecordHandle
// all have semanticFlags as the second operand.
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
if (isa<ConstantInt>(barrier.get_SemanticFlags())) {
unsigned semanticFlags = barrier.get_SemanticFlags_val();
if (semanticFlags & (unsigned)DXIL::BarrierSemanticFlag::GroupFlags)
return true;
}
return false;
}
default:
return false;
}
}

bool OP::BarrierRequiresNode(const llvm::CallInst *CI) {
OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
switch (opcode) {
case OpCode::BarrierByNodeRecordHandle:
return true;
case OpCode::BarrierByMemoryType: {
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
if (isa<ConstantInt>(barrier.get_MemoryTypeFlags())) {
unsigned memoryTypeFlags = barrier.get_MemoryTypeFlags_val();
// Mask off node flags, if allowed.
memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
memoryTypeFlags, ~(unsigned)DXIL::MemoryTypeFlag::NodeFlags);
return (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::NodeFlags) != 0;
}
return false;
}
default:
return false;
}
}

DXIL::BarrierMode OP::TranslateToBarrierMode(const llvm::CallInst *CI) {
OpCode opcode = OP::GetDxilOpFuncCallInst(CI);
switch (opcode) {
case OpCode::Barrier: {
DxilInst_Barrier barrier(const_cast<CallInst *>(CI));
if (isa<ConstantInt>(barrier.get_barrierMode())) {
unsigned mode = barrier.get_barrierMode_val();
return static_cast<DXIL::BarrierMode>(mode);
}
return DXIL::BarrierMode::Invalid;
}
case OpCode::BarrierByMemoryType: {
unsigned memoryTypeFlags = 0;
unsigned semanticFlags = 0;
DxilInst_BarrierByMemoryType barrier(const_cast<CallInst *>(CI));
if (isa<ConstantInt>(barrier.get_MemoryTypeFlags())) {
memoryTypeFlags = barrier.get_MemoryTypeFlags_val();
}
if (isa<ConstantInt>(barrier.get_SemanticFlags())) {
semanticFlags = barrier.get_SemanticFlags_val();
}

// Mask to legacy flags, if allowed.
memoryTypeFlags = MaskMemoryTypeFlagsIfAllowed(
memoryTypeFlags, (unsigned)DXIL::MemoryTypeFlag::LegacyFlags);
if (memoryTypeFlags & ~(unsigned)DXIL::MemoryTypeFlag::LegacyFlags)
return DXIL::BarrierMode::Invalid;

unsigned mode = 0;
if (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::GroupSharedMemory)
mode |= (unsigned)DXIL::BarrierMode::TGSMFence;
if (memoryTypeFlags & (unsigned)DXIL::MemoryTypeFlag::UavMemory) {
if (semanticFlags & (unsigned)DXIL::BarrierSemanticFlag::DeviceScope) {
mode |= (unsigned)DXIL::BarrierMode::UAVFenceGlobal;
} else if (semanticFlags &
(unsigned)DXIL::BarrierSemanticFlag::GroupScope) {
mode |= (unsigned)DXIL::BarrierMode::UAVFenceThreadGroup;
}
}
if (semanticFlags & (unsigned)DXIL::BarrierSemanticFlag::GroupSync)
mode |= (unsigned)DXIL::BarrierMode::SyncThreadGroup;
return static_cast<DXIL::BarrierMode>(mode);
}
default:
return DXIL::BarrierMode::Invalid;
}
}

#define SFLAG(stage) ((unsigned)1 << (unsigned)DXIL::ShaderKind::stage)
void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
unsigned &major, unsigned &minor,
Expand Down Expand Up @@ -3168,9 +3297,8 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
SFLAG(Mesh) | SFLAG(Pixel) | SFLAG(Node);
return;
}
// Instructions: BarrierByMemoryType=244, BarrierByMemoryHandle=245,
// BarrierByNodeRecordHandle=246, SampleCmpGrad=254
if ((244 <= op && op <= 246) || op == 254) {
// Instructions: BarrierByMemoryHandle=245, SampleCmpGrad=254
if (op == 245 || op == 254) {
major = 6;
minor = 8;
return;
Expand All @@ -3185,11 +3313,11 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
}
// Instructions: AllocateNodeOutputRecords=238, GetNodeRecordPtr=239,
// IncrementOutputCount=240, OutputComplete=241, GetInputRecordCount=242,
// FinishedCrossGroupSharing=243, CreateNodeOutputHandle=247,
// IndexNodeHandle=248, AnnotateNodeHandle=249,
// FinishedCrossGroupSharing=243, BarrierByNodeRecordHandle=246,
// CreateNodeOutputHandle=247, IndexNodeHandle=248, AnnotateNodeHandle=249,
// CreateNodeInputRecordHandle=250, AnnotateNodeRecordHandle=251,
// NodeOutputIsValid=252, GetRemainingRecursionLevels=253
if ((238 <= op && op <= 243) || (247 <= op && op <= 253)) {
if ((238 <= op && op <= 243) || (246 <= op && op <= 253)) {
major = 6;
minor = 8;
mask = SFLAG(Node);
Expand All @@ -3202,6 +3330,17 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
mask = SFLAG(Vertex);
return;
}
// Instructions: BarrierByMemoryType=244
if (op == 244) {
if (bWithTranslation) {
major = 6;
minor = 0;
} else {
major = 6;
minor = 8;
}
return;
}
// Instructions: WaveMatrix_Annotate=226, WaveMatrix_Depth=227,
// WaveMatrix_Fill=228, WaveMatrix_LoadRawBuf=229,
// WaveMatrix_LoadGroupShared=230, WaveMatrix_StoreRawBuf=231,
Expand Down Expand Up @@ -3249,20 +3388,23 @@ void OP::GetMinShaderModelAndMask(const llvm::CallInst *CI,

// Additional rules are applied manually here.

// Barrier with mode != UAVFenceGlobal requires compute, amplification,
// mesh, or node. Instructions: Barrier=80
if (opcode == DXIL::OpCode::Barrier) {
// Barrier mode should be a constant, but be robust to non-constants here.
if (isa<ConstantInt>(
CI->getArgOperand(DxilInst_Barrier::arg_barrierMode))) {
DxilInst_Barrier barrier(const_cast<CallInst *>(CI));
unsigned mode = barrier.get_barrierMode_val();
if (mode != (unsigned)DXIL::BarrierMode::UAVFenceGlobal) {
mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) |
SFLAG(Mesh) | SFLAG(Node);
// Barrier requiring node or group limit shader kinds.
if (IsDxilOpBarrier(opcode)) {
// If BarrierByMemoryType, check if translatable, or set min to 6.8.
if (bWithTranslation && opcode == DXIL::OpCode::BarrierByMemoryType) {
if (TranslateToBarrierMode(CI) == DXIL::BarrierMode::Invalid) {
major = 6;
minor = 8;
}
}
return;
if (BarrierRequiresNode(CI)) {
mask &= SFLAG(Library) | SFLAG(Node);
return;
} else if (BarrierRequiresGroup(CI)) {
mask &= SFLAG(Library) | SFLAG(Compute) | SFLAG(Amplification) |
SFLAG(Mesh) | SFLAG(Node);
return;
}
}

// 64-bit integer atomic ops require 6.6
Expand Down
Loading

0 comments on commit 1577fa9

Please sign in to comment.