From 312361ba142763030a18c350a70a84aaaf284857 Mon Sep 17 00:00:00 2001 From: Sirish Pande Date: Thu, 16 May 2024 16:21:42 -0500 Subject: [PATCH] AMDGPU: Support buffer_atomic_pk_add_bf16 for gfx950 Co-authored-by: Sirish Pande --- clang/test/CodeGenOpenCL/amdgpu-features.cl | 6 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +- llvm/lib/Target/AMDGPU/AMDGPU.td | 3 +- llvm/lib/Target/AMDGPU/BUFInstructions.td | 2 + llvm/lib/TargetParser/TargetParser.cpp | 2 + llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll | 92 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s | 8 +- llvm/test/MC/AMDGPU/gfx950_asm_features.s | 60 ++++++++++++ llvm/test/MC/AMDGPU/gfx950_err.s | 12 +++ llvm/test/MC/Disassembler/AMDGPU/gfx950.txt | 45 +++++++++ 10 files changed, 224 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 1e2921160d28f2..f739872685e780 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -89,7 +89,7 @@ // GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts" // GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" -// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" // GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" // GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" @@ -109,8 +109,8 @@ // GFX1151: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1152: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size"="true" // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index c35c1635a7b6b5..79ad9dbb67430d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1452,7 +1452,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic; -// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx12+. +// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+. def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic; class AMDGPUStructBufferAtomic : Intrinsic < @@ -1527,7 +1527,7 @@ def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic< ImmArg>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; -// gfx908 intrinsic +// gfx908 intrinsic. Supports v2bf16 on gfx12+ and gfx950 def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic; def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index be213431ea8f1f..89a35d911e8b1b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1593,7 +1593,8 @@ def FeatureISAVersion9_5_Common : FeatureSet< FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts, FeatureDot12Insts, - FeatureDot13Insts + FeatureDot13Insts, + FeatureAtomicBufferPkAddBF16Inst ])>; def FeatureISAVersion9_4_0 : FeatureSet< diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index e5978aee2b39a3..a288c58def5cbd 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -3296,6 +3296,8 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_vi <0x3f>; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; +defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Real_Atomic_vi <0x52>; + defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>; let SubtargetPredicate = isGFX90APlus in { diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 8b445b1aaa0548..a2782d00b35203 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -379,6 +379,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["atomic-ds-pk-add-16-insts"] = true; Features["atomic-flat-pk-add-16-insts"] = true; Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; Features["atomic-global-pk-add-bf16-inst"] = true; Features["16-bit-insts"] = true; Features["dpp"] = true; @@ -479,6 +480,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, Features["ashr-pk-insts"] = true; Features["dot12-insts"] = true; Features["dot13-insts"] = true; + Features["atomic-buffer-pk-add-bf16-inst"] = true; Features["gfx950-insts"] = true; [[fallthrough]]; case GK_GFX942: diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll new file mode 100644 index 00000000000000..ab380dbef107ad --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx950.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-SDAG +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -verify-machineinstrs | FileCheck %s -check-prefix=GFX950-GISEL + +declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg) +declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32) + +define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen sc0 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen sc0 +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: ; return to shader part epilog + %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + store <2 x bfloat> %orig, ptr null + ret float 1.0 +} + +define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, v2 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, v1 +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[2:3], s[0:3], s4 idxen offen +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, v2 +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[4:5], s[0:3], s4 idxen offen +; GFX950-GISEL-NEXT: s_endpgm + %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen +; GFX950-GISEL-NEXT: s_endpgm + %ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + ret void +} + +define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { +; GFX950-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0 +; GFX950-SDAG-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: flat_store_dword v[2:3], v0 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen sc0 +; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX950-GISEL-NEXT: flat_store_dword v[2:3], v0 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: ; return to shader part epilog + %orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) + store <2 x bfloat> %orig, ptr null + ret float 1.0 +} diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s b/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s index efeaf8339f6928..40ea9bb3678d9e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vbuffer_mubuf.s @@ -2636,16 +2636,16 @@ buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 th:TH_ATOMIC_CASCA // GFX12: encoding: [0x03,0x80,0x16,0xc4,0x05,0x10,0xe8,0x00,0x00,0xff,0xff,0x7f] buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:8388607 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:8388607 glc // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx950_asm_features.s b/llvm/test/MC/AMDGPU/gfx950_asm_features.s index 614d9e8703ae45..5aa1e2d5ccb11d 100644 --- a/llvm/test/MC/AMDGPU/gfx950_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx950_asm_features.s @@ -1089,3 +1089,63 @@ v_cvt_scalef32_2xpk16_fp6_f32 v[20:25], v[10:25], v[10:25], 22 // NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: // GFX950: v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 ; encoding: [0x14,0x00,0x53,0xd2,0x0a,0x15,0x2e,0x02] v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], 11 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03] +buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 + +// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error: +// GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7] +buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 diff --git a/llvm/test/MC/AMDGPU/gfx950_err.s b/llvm/test/MC/AMDGPU/gfx950_err.s index 21ffdba0d8e0cb..fd3da56c5130c4 100644 --- a/llvm/test/MC/AMDGPU/gfx950_err.s +++ b/llvm/test/MC/AMDGPU/gfx950_err.s @@ -341,3 +341,15 @@ v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 div:2 // GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand v_cvt_scalef32_2xpk16_bf6_f32 v[20:25], v[10:25], v[10:25], v6 clamp div:2 + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 slc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 dlc + +// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 glc slc dlc diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt index 3852845d308834..9fc9c58387b901 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950.txt @@ -74,3 +74,48 @@ # GFX950: v_permlane32_swap_b32_e64 v1, v2 fi:1 ; encoding: [0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00] 0x01,0x08,0x9a,0xd1,0x02,0x01,0x00,0x00 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03] +0xff,0x0f,0x48,0xe1,0x00,0xff,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x03,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x18,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x65 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x7c + +# GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03] +0xff,0x2f,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03] +0xff,0x1f,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +0x00,0x00,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03] +0x07,0x00,0x48,0xe1,0x00,0x05,0x02,0x03 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0x80 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xc1 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf0 + +# GFX950: buffer_atomic_pk_add_bf16 v5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7] +0xff,0x0f,0x48,0xe1,0x00,0x05,0x02,0xf7