-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Start fixing inconsistencies in usage of SubtargetPredicate #96337
AMDGPU: Start fixing inconsistencies in usage of SubtargetPredicate #96337
Conversation
SubtargetPredicate should be the primary "does this instruction exist" predicate, with OtherPredicates used for other side pieces of information. Changes like 856d1c4 were backwards. The problematic usage is how GFX12 is using HasRestrictedOffset. The multiclasses for buffers should probably be split up instead of hiding OtherPredicates inside the buffer atomic multiclasses. The two cases are mutually exclusive and really need a negated predicate for the not-gfx12 case. It's pretty terrible we have to manage this in the first place. TableGen should be able to figure out the required predicates from any instructions that appear in the pattern output.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesSubtargetPredicate should be the primary "does this instruction exist" Changes like 856d1c4 were backwards. The problematic usage is how It's pretty terrible we have to manage this in the first place. Full diff: https://github.com/llvm/llvm-project/pull/96337.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 21335e9b64647..639f071cafe50 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -532,7 +532,7 @@ multiclass MUBUF_Pseudo_Load_Pats_Common<string BaseInst, ValueType load_vt = i3
}
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag>{
- let SubtargetPredicate = HasUnrestrictedSOffset in {
+ let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst, load_vt, ld>;
}
defm : MUBUF_Pseudo_Load_Pats_Common<BaseInst # "_VBUFFER", load_vt, ld>;
@@ -629,7 +629,7 @@ multiclass MUBUF_Pseudo_Store_Pats_Common<string BaseInst, ValueType store_vt =
}
multiclass MUBUF_Pseudo_Store_Pats<string BaseInst, ValueType store_vt = i32, SDPatternOperator st = null_frag> {
- let SubtargetPredicate = HasUnrestrictedSOffset in {
+ let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst, store_vt, st>;
}
defm : MUBUF_Pseudo_Store_Pats_Common<BaseInst # "_VBUFFER", store_vt, st>;
@@ -1227,12 +1227,12 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16
>;
-let OtherPredicates = [HasAtomicFaddRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddRtnInsts in
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN<
"buffer_atomic_add_f32", VGPR_32, f32, null_frag
>;
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16, null_frag
>;
@@ -1699,9 +1699,11 @@ multiclass SIBufferAtomicPat_Common<string OpPrefix, ValueType vt, string Inst,
multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
list<string> RtnModes = ["ret", "noret"]> {
- let SubtargetPredicate = HasUnrestrictedSOffset in {
+ let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst, RtnModes>;
}
+
+ // FIXME: This needs a !HasUnrestrictedSOffset predicate
defm : SIBufferAtomicPat_Common<OpPrefix, vt, Inst # "_VBUFFER", RtnModes>;
}
@@ -1732,18 +1734,19 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
-let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
+let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
let SubtargetPredicate = isGFX12Plus in {
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;
+}
- let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
- defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
+let SubtargetPredicate = HasAtomicCSubNoRtnInsts in {
+defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
}
-let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
+let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
}
@@ -1803,29 +1806,21 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
defm : BufferAtomicPatterns_NO_RTN_Common<name, vt, opcode # "_VBUFFER">;
}
-let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["noret"]>;
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
- let SubtargetPredicate = isGFX9Only in
- defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
-
- let SubtargetPredicate = isGFX12Plus in
- defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["noret"]>;
-} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts]
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in {
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
+} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts
-let OtherPredicates = [HasAtomicFaddRtnInsts] in
+let SubtargetPredicate = HasAtomicFaddRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["ret"]>;
-let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
- let SubtargetPredicate = isGFX9Only in
- defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
-
- let SubtargetPredicate = isGFX12Plus in
- defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16_VBUFFER", ["ret"]>;
-} // End OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts]
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in {
+ defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
+} // End SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts
-let OtherPredicates = [HasBufferFlatGlobalAtomicsF64] in {
+let SubtargetPredicate = HasBufferFlatGlobalAtomicsF64 in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
@@ -1901,7 +1896,7 @@ multiclass SIBufferAtomicCmpSwapPat_Common<ValueType vt, ValueType data_vt, stri
}
multiclass SIBufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> {
- let SubtargetPredicate = HasUnrestrictedSOffset in {
+ let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst>;
}
defm : SIBufferAtomicCmpSwapPat_Common<vt, data_vt, Inst # "_VBUFFER">;
@@ -1952,7 +1947,7 @@ multiclass MUBUFLoad_PatternOffset_Common <string Instr, ValueType vt,
multiclass MUBUFLoad_PatternOffset <string Instr, ValueType vt,
PatFrag ld> {
- let SubtargetPredicate = HasUnrestrictedSOffset in {
+ let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MUBUFLoad_PatternOffset_Common<Instr, vt, ld>;
}
defm : MUBUFLoad_PatternOffset_Common<Instr # "_VBUFFER", vt, ld>;
@@ -2193,7 +2188,7 @@ multiclass MTBUF_LoadIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
- let SubtargetPredicate = HasUnrestrictedSOffset in {
+ let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode, memoryVt>;
}
defm : MTBUF_LoadIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
@@ -2208,7 +2203,7 @@ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v3f32, "TBUFFER_LOAD_FORMAT_XYZ">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
-let OtherPredicates = [HasUnpackedD16VMem] in {
+let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
@@ -2216,7 +2211,7 @@ let OtherPredicates = [HasUnpackedD16VMem] in {
defm : MTBUF_LoadIntrinsicPat_Common<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
-let OtherPredicates = [HasPackedD16VMem] in {
+let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
@@ -2265,7 +2260,7 @@ multiclass MTBUF_StoreIntrinsicPat_Common<SDPatternOperator name, ValueType vt,
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode, ValueType memoryVt = vt> {
- let SubtargetPredicate = HasUnrestrictedSOffset in {
+ let OtherPredicates = [HasUnrestrictedSOffset] in {
defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode, memoryVt>;
}
defm : MTBUF_StoreIntrinsicPat_Common<name, vt, opcode # "_VBUFFER", memoryVt>;
@@ -2280,7 +2275,7 @@ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v2f32, "TBUFFER_STORE_FORMAT_XY"
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v3f32, "TBUFFER_STORE_FORMAT_XYZ">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
-let OtherPredicates = [HasUnpackedD16VMem] in {
+let SubtargetPredicate = HasUnpackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
@@ -2288,7 +2283,7 @@ let OtherPredicates = [HasUnpackedD16VMem] in {
defm : MTBUF_StoreIntrinsicPat_Common<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
} // End HasUnpackedD16VMem.
-let OtherPredicates = [HasPackedD16VMem] in {
+let SubtargetPredicate = HasPackedD16VMem in {
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_X">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
@@ -3267,10 +3262,7 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_vi <0x3f>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
-
-let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;
-} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts
let SubtargetPredicate = isGFX90APlus in {
defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am agnostic on the code changes themselves.
To understand the behavior it is necessary to consider the union of all Predicates, whether they are SubtargetPredicate, OtherPredicate, or True16Predicate. I would not care if we renamed them to Predicate0, Predicate1, Predicate2.
I think it is too useful for encapsulation and deduplication to apply different predicates on the members of a multiclass to forbid this, even though it is impossible to tell if that has been done when inheriting from the class except by inspecting the code.
This would be helped by a consistent convention at least. using SubtargetPredicate and OtherPredicates as the top level let is currently mixed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is almost impossible to judge the consequences. I guess you need these applied to all downstream branches first and see it anything is broken.
This was so I could merge my assorted atomic patches into the downstream branches. It's too difficult to wrangle the isGFXY style checks |
@@ -1227,12 +1227,12 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < | |||
"buffer_atomic_pk_add_f16", VGPR_32, v2f16 | |||
>; | |||
|
|||
let OtherPredicates = [HasAtomicFaddRtnInsts] in | |||
let SubtargetPredicate = HasAtomicFaddRtnInsts in |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any logic here? Above OtherPredicates is used for 'HasSomething' condition, here it is used as a SubtargetPredicate. I.e. I do not see it is making anything better or more consistent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think SubtargetPredicate should be the "primary" predicate attached to some subtarget feature, for whether the instruction exists at all. The OtherPredicates are for misc. other things, like xnack is enabled (or in the buffer cases, the gfx12 vbuffer encoding).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK anyway.
I think it will be definitely better to document this convention somewhere; otherwise it is pretty challenging to decipher it. |
…lvm#96337) SubtargetPredicate should be the primary "does this instruction exist" predicate, with OtherPredicates used for other side pieces of information. Changes like 856d1c4 were backwards. The problematic usage is how GFX12 is using HasRestrictedOffset. The multiclasses for buffers should probably be split up instead of hiding OtherPredicates inside the buffer atomic multiclasses. The two cases are mutually exclusive and really need a negated predicate for the not-gfx12 case. It's pretty terrible we have to manage this in the first place. TableGen should be able to figure out the required predicates from any instructions that appear in the pattern output.
This introduces compiler and dwarfdump support for emitting and parsing the new `DW_CFA_AARCH64_negate_ra_state_with_pc` DWARF instruction for FEAT_PAuth_LR. This does mean that, when using FEAT_PAuthLR, the improvements introduced in llvm#96337 cannot be utilised. `.cfi_negate_ra_state_with_pc` must be emitted directly after the signing instruction, and when bundled with other CFI calls, leads to faults when running a program. There are no changes seen when not using FEAT_PAuthLR to how the CFI Instructions are generated. See ARM-software/abi-aa#245 for the ABI change that incororates FEAT_PAuthLR. Authored-by: pratlucas <[email protected]> Co-authored by: vhscampos <[email protected]> Co-authored by: Stylie777 <[email protected]>
This introduces compiler and dwarfdump support for emitting and parsing the new `DW_CFA_AARCH64_negate_ra_state_with_pc` DWARF instruction for FEAT_PAuth_LR. This does mean that, when using FEAT_PAuthLR, the improvements introduced in llvm#96337 cannot be utilised. `.cfi_negate_ra_state_with_pc` must be emitted directly after the signing instruction, and when bundled with other CFI calls, leads to faults when running a program. There are no changes seen when not using FEAT_PAuthLR to how the CFI Instructions are generated. See ARM-software/abi-aa#245 for the ABI change that incororates FEAT_PAuthLR. Authored-by: pratlucas <[email protected]> Co-authored by: vhscampos <[email protected]> Co-authored by: Stylie777 <[email protected]>
This introduces compiler and dwarfdump support for emitting and parsing the new `DW_CFA_AARCH64_negate_ra_state_with_pc` DWARF instruction for FEAT_PAuth_LR. This does mean that, when using FEAT_PAuthLR, the improvements introduced in llvm#96337 cannot be utilised. `.cfi_negate_ra_state_with_pc` must be emitted directly after the signing instruction, and when bundled with other CFI calls, leads to faults when running a program. There are no changes seen when not using FEAT_PAuthLR to how the CFI Instructions are generated. See ARM-software/abi-aa#245 for the ABI change that incororates FEAT_PAuthLR. Authored-by: pratlucas <[email protected]> Co-authored by: vhscampos <[email protected]> Co-authored by: Stylie777 <[email protected]>
SubtargetPredicate should be the primary "does this instruction exist"
predicate, with OtherPredicates used for other side pieces of information.
Changes like 856d1c4 were backwards. The problematic usage is how
GFX12 is using HasRestrictedOffset. The multiclasses for buffers
should probably be split up instead of hiding OtherPredicates inside
the buffer atomic multiclasses. The two cases are mutually exclusive
and really need a negated predicate for the not-gfx12 case.
It's pretty terrible we have to manage this in the first place.
TableGen should be able to figure out the required predicates
from any instructions that appear in the pattern output.