From 3a728298ac2c9c142a6cfa3fd389572d14fbaee4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 23 May 2024 19:27:32 +0200 Subject: [PATCH] AMDGPU: Legalize fminimum and fmaximum f32 for gfx950 Select to minimum3/maximum3. Leave f16/v2f16 for later since it's complicated by only having the vector version. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 + llvm/lib/Target/AMDGPU/VOP3Instructions.td | 17 + llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 1368 +++++++++++------- llvm/test/CodeGen/AMDGPU/fminimum3.ll | 1368 +++++++++++------- llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 421 +++--- llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 421 +++--- 6 files changed, 1977 insertions(+), 1622 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3f0845864336fe..2e0f95161935a9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -855,6 +855,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction({ISD::FMINIMUM, ISD::FMAXIMUM}, {MVT::v4f16, MVT::v8f16, MVT::v16f16, MVT::v32f16}, Custom); + } else { + // FIXME: For nnan fmaximum, emit the fmaximum3 instead of fmaxnum + if (Subtarget->hasMinimum3Maximum3F32()) + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 5d4d56e8b0ad22..2b207e008581b3 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1234,6 +1234,23 @@ def : IntClampPat; def : IntClampPat; def : IntClampPat; +//===----------------------------------------------------------------------===// +// Floating-point operation Patterns +//===----------------------------------------------------------------------===// + +// Implement fminimum(x, y) by using minimum3(x, y, y) +class MinimumMaximumByMinimum3Maximum3 : GCNPat< + (vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))), + (inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1) +>; + +// Prefer the real 2 operand form if legal +let SubtargetPredicate = HasMinimum3Maximum3F32, AddedComplexity = -1000 in { +def : MinimumMaximumByMinimum3Maximum3; +def : MinimumMaximumByMinimum3Maximum3; +} + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 08122cd0d89eab..209ae86b4dedce 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -14,19 +14,26 @@ define float @v_fmaximum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) ret float %max1 @@ -43,19 +50,26 @@ define float @v_fmaximum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v2, v0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %c, float %max0) ret float %max1 @@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fmaximum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: v_readfirstlane_b32 s0, v0 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_fmaximum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_max_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_fmaximum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_fmaximum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) %cast = bitcast float %max1 to i32 @@ -103,19 +126,26 @@ define float @v_fmaximum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %max0 = call float @llvm.maximum.f32(float %a.fabs, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -133,19 +163,26 @@ define float @v_fmaximum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, v0, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, v0, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v1|, |v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fabs = call float @llvm.fabs.f32(float %b) %max0 = call float @llvm.maximum.f32(float %a, float %b.fabs) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -163,19 +200,26 @@ define float @v_fmaximum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fabs = call float @llvm.fabs.f32(float %c) %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c.fabs) @@ -193,19 +237,26 @@ define float @v_fmaximum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v1|, |v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -225,19 +276,26 @@ define float @v_fmaximum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v1, -v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %b.fneg = fneg float %b %c.fneg = fneg float %c @@ -257,19 +315,26 @@ define float @v_fmaximum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -|v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -|v0|, -|v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -|v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -|v0|, -|v1|, -|v1| +; GFX950-NEXT: v_maximum3_f32 v0, v0, -|v2|, -|v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -292,19 +357,26 @@ define float @v_fmaximum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, -v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, -v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, -v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %max0 = call float @llvm.maximum.f32(float %a.fneg, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -322,19 +394,26 @@ define float @v_fmaximum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v3, v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v3, v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v1, -v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fneg = fneg float %b %max0 = call float @llvm.maximum.f32(float %a, float %b.fneg) %max1 = call float @llvm.maximum.f32(float %max0, float %c) @@ -352,19 +431,26 @@ define float @v_fmaximum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_fneg2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_fneg2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_fneg2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fneg = fneg float %c %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c.fneg) @@ -382,19 +468,27 @@ define float @v_fmaximum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_const0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_const0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_const0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float 8.0, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) ret float %max1 @@ -411,19 +505,27 @@ define float @v_fmaximum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32__const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32__const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32__const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float 8.0) ret float %max1 @@ -440,19 +542,26 @@ define float @v_fmaximum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_inlineimm0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_inlineimm0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_inlineimm0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float 4.0, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) ret float %max1 @@ -469,19 +578,26 @@ define float @v_fmaximum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32__inlineimm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v1, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32__inlineimm: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v1, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32__inlineimm: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float 4.0) ret float %max1 @@ -500,19 +616,28 @@ define float @v_fmaximum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_maximum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_f32_const1_const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_max_f32_e32 v1, 0x41800000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_f32_const1_const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_max_f32_e32 v1, 0x41800000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_f32_const1_const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_mov_b32 s0, 0x41800000 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float 8.0) %max1 = call float @llvm.maximum.f32(float %max0, float 16.0) ret float %max1 @@ -530,27 +655,36 @@ define <2 x float> @v_fmaximum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_maximum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v4, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v5, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v4, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v5, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v0, v4, v0, v0 +; GFX950-NEXT: v_maximum3_f32 v1, v5, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %c, <2 x float> %max0) ret <2 x float> %max1 @@ -568,27 +702,36 @@ define <2 x float> @v_fmaximum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v0, v4 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, v1, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v0, v4 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, v1, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -606,27 +749,36 @@ define <2 x float> @v_fmaximum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v6, |v1|, |v3| -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| -; GFX9-NEXT: v_max_f32_e64 v3, |v0|, |v2| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v0, |v4| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v1, |v5| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v6, |v1|, |v3| +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| +; GFX940-NEXT: v_max_f32_e64 v3, |v0|, |v2| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v0, |v4| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v1, |v5| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v3|, |v3| +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v2|, |v2| +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v4|, |v4| +; GFX950-NEXT: v_maximum3_f32 v1, v1, |v5|, |v5| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) %b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b) %c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c) @@ -647,27 +799,36 @@ define <2 x float> @v_fmaximum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v6, -v1, -v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 -; GFX9-NEXT: v_max_f32_e64 v3, -v0, -v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v0, -v4 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_max_f32_e64 v2, v1, -v5 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v6, -v1, -v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 +; GFX940-NEXT: v_max_f32_e64 v3, -v0, -v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v0, -v4 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_max_f32_e64 v2, v1, -v5 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v3, -v3 +; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v2, -v2 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v4, -v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, -v5, -v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <2 x float> %a %b.fneg = fneg <2 x float> %b %c.fneg = fneg <2 x float> %c @@ -688,27 +849,36 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_maximum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v1 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v4, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v2, v1, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v1 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v4, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v4, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: v_max_f32_e32 v2, v1, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> ) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -726,27 +896,36 @@ define <2 x float> @v_fmaximum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_maximum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v2f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v4, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: v_max_f32_e32 v2, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v2f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v4, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX940-NEXT: v_max_f32_e32 v2, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v2f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.maximum.v2f32(<2 x float> %max0, <2 x float> ) ret <2 x float> %max1 @@ -765,35 +944,46 @@ define <3 x float> @v_fmaximum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_maximum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v6, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v7, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v8, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v6, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v7, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v8, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v6, v0, v0 +; GFX950-NEXT: v_maximum3_f32 v1, v7, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v2, v8, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %c, <3 x float> %max0) ret <3 x float> %max1 @@ -812,35 +1002,46 @@ define <3 x float> @v_fmaximum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v0, v6 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v1, v7 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v2, v8 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v0, v6 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v1, v7 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v2, v8 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v7, v7 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v8, v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -859,35 +1060,46 @@ define <3 x float> @v_fmaximum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v9, |v2|, |v5| -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| -; GFX9-NEXT: v_max_f32_e64 v5, |v1|, |v4| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| -; GFX9-NEXT: v_max_f32_e64 v4, |v0|, |v3| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v0, |v6| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v1, |v7| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v2, |v8| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v9, |v2|, |v5| +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| +; GFX940-NEXT: v_max_f32_e64 v5, |v1|, |v4| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| +; GFX940-NEXT: v_max_f32_e64 v4, |v0|, |v3| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v0, |v6| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v1, |v7| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v2, |v8| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, |v2|, |v5|, |v5| +; GFX950-NEXT: v_maximum3_f32 v1, |v1|, |v4|, |v4| +; GFX950-NEXT: v_maximum3_f32 v0, |v0|, |v3|, |v3| +; GFX950-NEXT: v_maximum3_f32 v0, v0, |v6|, |v6| +; GFX950-NEXT: v_maximum3_f32 v1, v1, |v7|, |v7| +; GFX950-NEXT: v_maximum3_f32 v2, v2, |v8|, |v8| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) %b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b) %c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c) @@ -909,35 +1121,46 @@ define <3 x float> @v_fmaximum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_maximum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e64 v9, -v2, -v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 -; GFX9-NEXT: v_max_f32_e64 v5, -v1, -v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 -; GFX9-NEXT: v_max_f32_e64 v4, -v0, -v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v0, -v6 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v1, -v7 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_max_f32_e64 v3, v2, -v8 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e64 v9, -v2, -v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 +; GFX940-NEXT: v_max_f32_e64 v5, -v1, -v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 +; GFX940-NEXT: v_max_f32_e64 v4, -v0, -v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v0, -v6 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v1, -v7 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_max_f32_e64 v3, v2, -v8 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, -v2, -v5, -v5 +; GFX950-NEXT: v_maximum3_f32 v1, -v1, -v4, -v4 +; GFX950-NEXT: v_maximum3_f32 v0, -v0, -v3, -v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, -v6, -v6 +; GFX950-NEXT: v_maximum3_f32 v1, v1, -v7, -v7 +; GFX950-NEXT: v_maximum3_f32 v2, v2, -v8, -v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <3 x float> %a %b.fneg = fneg <3 x float> %b %c.fneg = fneg <3 x float> %c @@ -959,35 +1182,46 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_maximum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v2 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_max_f32_e32 v6, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_max_f32_e32 v6, v0, v3 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v3, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, v2, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v2 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_max_f32_e32 v6, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_max_f32_e32 v6, v0, v3 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: v_max_f32_e32 v3, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, v2, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> ) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -1006,35 +1240,46 @@ define <3 x float> @v_fmaximum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_maximum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fmaximum3_v3f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v6, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_max_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc -; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v3, 4.0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fmaximum3_v3f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v6, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_max_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_max_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc +; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v3, 4.0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fmaximum3_v3f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: v_maximum3_f32 v2, v2, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.maximum.v3f32(<3 x float> %max0, <3 x float> ) ret <3 x float> %max1 @@ -3165,19 +3410,26 @@ define <2 x float> @v_no_fmaximum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_maximum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_no_fmaximum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_max_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_max_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_max_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_no_fmaximum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_maximum3_f32 v1, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 @@ -3193,22 +3445,31 @@ define amdgpu_ps <2 x i32> @s_no_fmaximum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_maximum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_no_fmaximum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_max_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_max_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_max_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s1, v1 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_no_fmaximum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_maximum3_f32 v1, v0, s2, s2 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: v_readfirstlane_b32 s1, v1 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.maximum.f32(float %a, float %b) %max1 = call float @llvm.maximum.f32(float %max0, float %c) %cast0 = bitcast float %max0 to i32 @@ -3372,6 +3633,3 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 ret <2 x double> %insert.1 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX940: {{.*}} -; GFX950: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index 43293512c8c21d..000f6c190b9773 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -14,19 +14,26 @@ define float @v_fminimum3_f32(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) ret float %max1 @@ -43,19 +50,26 @@ define float @v_fminimum3_f32_commute(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v2, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v2, v0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %c, float %max0) ret float %max1 @@ -70,21 +84,30 @@ define amdgpu_ps i32 @s_fminimum3_f32(float inreg %a, float inreg %b, float inre ; GFX12-NEXT: v_readfirstlane_b32 s0, v0 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_fminimum3_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_min_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_fminimum3_f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_fminimum3_f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s2, s2 +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) %cast = bitcast float %max1 to i32 @@ -103,19 +126,26 @@ define float @v_fminimum3_f32_fabs0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, |v0|, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, |v0|, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %max0 = call float @llvm.minimum.f32(float %a.fabs, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -133,19 +163,26 @@ define float @v_fminimum3_f32_fabs1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, |v1|, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, v0, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, v0, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v1|, |v1| +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fabs = call float @llvm.fabs.f32(float %b) %max0 = call float @llvm.minimum.f32(float %a, float %b.fabs) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -163,19 +200,26 @@ define float @v_fminimum3_f32_fabs2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fabs = call float @llvm.fabs.f32(float %c) %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c.fabs) @@ -193,19 +237,26 @@ define float @v_fminimum3_f32_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, |v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, |v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v1|, |v1| +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v2|, |v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -225,19 +276,26 @@ define float @v_fminimum3_f32_fneg_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, -v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v1, -v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %b.fneg = fneg float %b %c.fneg = fneg float %c @@ -257,19 +315,26 @@ define float @v_fminimum3_f32_fneg_fabs_all(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v2| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg_fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, -|v0|, -|v1| -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, -|v2| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg_fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, -|v0|, -|v1| +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -|v0|, -|v1| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, -|v2| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -|v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg_fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, -|v0|, -|v1|, -|v1| +; GFX950-NEXT: v_minimum3_f32 v0, v0, -|v2|, -|v2| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %c.fabs = call float @llvm.fabs.f32(float %c) @@ -292,19 +357,26 @@ define float @v_fminimum3_f32_fneg0(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, -v0, v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, -v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, -v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, -v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg float %a %max0 = call float @llvm.minimum.f32(float %a.fneg, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -322,19 +394,26 @@ define float @v_fminimum3_f32_fneg1(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, -v1, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v3, v0, -v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v3, v0, -v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v1, -v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %b.fneg = fneg float %b %max0 = call float @llvm.minimum.f32(float %a, float %b.fneg) %max1 = call float @llvm.minimum.f32(float %max0, float %c) @@ -352,19 +431,26 @@ define float @v_fminimum3_f32_fneg2(float %a, float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, -v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_fneg2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v1, v0, -v2 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_fneg2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v1, v0, -v2 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_fneg2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v2, -v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %c.fneg = fneg float %c %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c.fneg) @@ -382,19 +468,27 @@ define float @v_fminimum3_f32_const0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 0x41000000, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_const0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_const0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_const0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float 8.0, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) ret float %max1 @@ -411,19 +505,27 @@ define float @v_fminimum3_f32__const2(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 0x41000000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32__const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32__const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32__const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float 8.0) ret float %max1 @@ -440,19 +542,26 @@ define float @v_fminimum3_f32_inlineimm0(float %b, float %c) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, 4.0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_inlineimm0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_inlineimm0: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_inlineimm0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float 4.0, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) ret float %max1 @@ -469,19 +578,26 @@ define float @v_fminimum3_f32__inlineimm(float %a, float %b) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, v1, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32__inlineimm: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v1, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32__inlineimm: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v2, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v3, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v1, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32__inlineimm: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float 4.0) ret float %max1 @@ -500,19 +616,28 @@ define float @v_fminimum3_f32_const1_const2(float %a) { ; GFX12-NEXT: v_minimum3_f32 v0, v0, s0, 0x41800000 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_f32_const1_const2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v1, 0x41000000, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_min_f32_e32 v1, 0x41800000, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_f32_const1_const2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v1, 0x41000000, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_min_f32_e32 v1, 0x41800000, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_f32_const1_const2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: s_mov_b32 s0, 0x41000000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_mov_b32 s0, 0x41800000 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s0, s0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float 8.0) %max1 = call float @llvm.minimum.f32(float %max0, float 16.0) ret float %max1 @@ -530,27 +655,36 @@ define <2 x float> @v_fminimum3_v2f32(<2 x float> %a, <2 x float> %b, <2 x float ; GFX12-NEXT: v_minimum3_f32 v1, v5, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v4, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v5, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v4, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v4, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v5, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v5, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v0, v4, v0, v0 +; GFX950-NEXT: v_minimum3_f32 v1, v5, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %c, <2 x float> %max0) ret <2 x float> %max1 @@ -568,27 +702,36 @@ define <2 x float> @v_fminimum3_v2f32_commute(<2 x float> %a, <2 x float> %b, <2 ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v0, v4 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, v1, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v0, v4 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, v1, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -606,27 +749,36 @@ define <2 x float> @v_fminimum3_v2f32__fabs_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v5| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v6, |v1|, |v3| -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| -; GFX9-NEXT: v_min_f32_e64 v3, |v0|, |v2| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v0, |v4| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v1, |v5| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v6, |v1|, |v3| +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v3| +; GFX940-NEXT: v_min_f32_e64 v3, |v0|, |v2| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v2| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v0, |v4| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v4| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v1, |v5| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v5| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v3|, |v3| +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v2|, |v2| +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v4|, |v4| +; GFX950-NEXT: v_minimum3_f32 v1, v1, |v5|, |v5| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) %b.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %b) %c.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %c) @@ -647,27 +799,36 @@ define <2 x float> @v_fminimum3_v2f32__fneg_all(<2 x float> %a, <2 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v1, -v1, -v3, -v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v6, -v1, -v3 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 -; GFX9-NEXT: v_min_f32_e64 v3, -v0, -v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v0, -v4 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc -; GFX9-NEXT: v_min_f32_e64 v2, v1, -v5 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v6, -v1, -v3 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v3 +; GFX940-NEXT: v_min_f32_e64 v3, -v0, -v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v0, -v4 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v2, vcc +; GFX940-NEXT: v_min_f32_e64 v2, v1, -v5 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v3, -v3 +; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v2, -v2 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v4, -v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, -v5, -v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <2 x float> %a %b.fneg = fneg <2 x float> %b %c.fneg = fneg <2 x float> %c @@ -688,27 +849,36 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm1(<2 x float> %a, <2 x float> %c ; GFX12-NEXT: v_minimum3_f32 v1, v1, 2.0, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v1 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v4, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: v_min_f32_e32 v2, v1, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v1 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v4, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v4, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: v_min_f32_e32 v2, v1, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> ) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> %c) ret <2 x float> %max1 @@ -726,27 +896,36 @@ define <2 x float> @v_fminimum3_v2f32__inlineimm2(<2 x float> %a, <2 x float> %b ; GFX12-NEXT: v_minimum3_f32 v1, v1, v3, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v2f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v4, v1, v3 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v0, v2 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc -; GFX9-NEXT: v_min_f32_e32 v2, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v2f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v4, v1, v3 +; GFX940-NEXT: v_mov_b32_e32 v5, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v0, v2 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v2, vcc +; GFX940-NEXT: v_min_f32_e32 v2, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v2f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) %max1 = call <2 x float> @llvm.minimum.v2f32(<2 x float> %max0, <2 x float> ) ret <2 x float> %max1 @@ -765,35 +944,46 @@ define <3 x float> @v_fminimum3_v3f32(<3 x float> %a, <3 x float> %b, <3 x float ; GFX12-NEXT: v_minimum3_f32 v2, v8, v2, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v6, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v7, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v8, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v6, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v6, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v7, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v7, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v8, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v8, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v6, v0, v0 +; GFX950-NEXT: v_minimum3_f32 v1, v7, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v2, v8, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %c, <3 x float> %max0) ret <3 x float> %max1 @@ -812,35 +1002,46 @@ define <3 x float> @v_fminimum3_v3f32_commute(<3 x float> %a, <3 x float> %b, <3 ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32_commute: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v9, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v0, v6 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v1, v7 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v2, v8 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32_commute: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v9, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v0, v6 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v1, v7 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v2, v8 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32_commute: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v7, v7 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v8, v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -859,35 +1060,46 @@ define <3 x float> @v_fminimum3_v3f32__fabs_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v8| ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__fabs_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v9, |v2|, |v5| -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| -; GFX9-NEXT: v_min_f32_e64 v5, |v1|, |v4| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| -; GFX9-NEXT: v_min_f32_e64 v4, |v0|, |v3| -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v0, |v6| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v1, |v7| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v2, |v8| -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__fabs_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v9, |v2|, |v5| +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v2|, |v5| +; GFX940-NEXT: v_min_f32_e64 v5, |v1|, |v4| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v1|, |v4| +; GFX940-NEXT: v_min_f32_e64 v4, |v0|, |v3| +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, |v0|, |v3| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v0, |v6| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, |v6| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v1, |v7| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, |v7| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v2, |v8| +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, |v8| +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__fabs_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, |v2|, |v5|, |v5| +; GFX950-NEXT: v_minimum3_f32 v1, |v1|, |v4|, |v4| +; GFX950-NEXT: v_minimum3_f32 v0, |v0|, |v3|, |v3| +; GFX950-NEXT: v_minimum3_f32 v0, v0, |v6|, |v6| +; GFX950-NEXT: v_minimum3_f32 v1, v1, |v7|, |v7| +; GFX950-NEXT: v_minimum3_f32 v2, v2, |v8|, |v8| +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a) %b.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %b) %c.fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %c) @@ -909,35 +1121,46 @@ define <3 x float> @v_fminimum3_v3f32__fneg_all(<3 x float> %a, <3 x float> %b, ; GFX12-NEXT: v_minimum3_f32 v2, -v2, -v5, -v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__fneg_all: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e64 v9, -v2, -v5 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 -; GFX9-NEXT: v_min_f32_e64 v5, -v1, -v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 -; GFX9-NEXT: v_min_f32_e64 v4, -v0, -v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v0, -v6 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v1, -v7 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc -; GFX9-NEXT: v_min_f32_e64 v3, v2, -v8 -; GFX9-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__fneg_all: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e64 v9, -v2, -v5 +; GFX940-NEXT: v_mov_b32_e32 v10, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v2, -v5 +; GFX940-NEXT: v_min_f32_e64 v5, -v1, -v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v9, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v1, -v4 +; GFX940-NEXT: v_min_f32_e64 v4, -v0, -v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, -v0, -v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v4, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v0, -v6 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v0, -v6 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v1, -v7 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v1, -v7 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v10, v3, vcc +; GFX940-NEXT: v_min_f32_e64 v3, v2, -v8 +; GFX940-NEXT: v_cmp_o_f32_e64 vcc, v2, -v8 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v10, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__fneg_all: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, -v2, -v5, -v5 +; GFX950-NEXT: v_minimum3_f32 v1, -v1, -v4, -v4 +; GFX950-NEXT: v_minimum3_f32 v0, -v0, -v3, -v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, -v6, -v6 +; GFX950-NEXT: v_minimum3_f32 v1, v1, -v7, -v7 +; GFX950-NEXT: v_minimum3_f32 v2, v2, -v8, -v8 +; GFX950-NEXT: s_setpc_b64 s[30:31] %a.fneg = fneg <3 x float> %a %b.fneg = fneg <3 x float> %b %c.fneg = fneg <3 x float> %c @@ -959,35 +1182,46 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm1(<3 x float> %a, <3 x float> %c ; GFX12-NEXT: v_minimum3_f32 v2, v2, 2.0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__inlineimm1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v2 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc -; GFX9-NEXT: v_min_f32_e32 v6, 2.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_min_f32_e32 v6, v0, v3 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: v_min_f32_e32 v3, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, v2, v5 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__inlineimm1: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v2 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc +; GFX940-NEXT: v_min_f32_e32 v6, 2.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_min_f32_e32 v6, v0, v3 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: v_min_f32_e32 v3, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, v2, v5 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__inlineimm1: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v1, v1, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 2.0, 2.0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> ) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> %c) ret <3 x float> %max1 @@ -1006,35 +1240,46 @@ define <3 x float> @v_fminimum3_v3f32__inlineimm2(<3 x float> %a, <3 x float> %b ; GFX12-NEXT: v_minimum3_f32 v2, v2, v5, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_fminimum3_v3f32__inlineimm2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v6, v2, v5 -; GFX9-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX9-NEXT: v_min_f32_e32 v5, v1, v4 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v4, v0, v3 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc -; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v1 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v3, 4.0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_fminimum3_v3f32__inlineimm2: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v6, v2, v5 +; GFX940-NEXT: v_mov_b32_e32 v7, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 +; GFX940-NEXT: v_min_f32_e32 v5, v1, v4 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 +; GFX940-NEXT: v_min_f32_e32 v4, v0, v3 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v5, vcc +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v4, vcc +; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v1 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v1, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v3, 4.0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v2, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_fminimum3_v3f32__inlineimm2: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v0, v0, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v1, v1, 4.0, 4.0 +; GFX950-NEXT: v_minimum3_f32 v2, v2, 4.0, 4.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %a, <3 x float> %b) %max1 = call <3 x float> @llvm.minimum.v3f32(<3 x float> %max0, <3 x float> ) ret <3 x float> %max1 @@ -3165,19 +3410,26 @@ define <2 x float> @v_no_fminimum3_f32__multi_use(float %a, float %b, float %c) ; GFX12-NEXT: v_minimum_f32 v1, v0, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_no_fminimum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v3, v0, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc -; GFX9-NEXT: v_min_f32_e32 v1, v0, v2 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX940-LABEL: v_no_fminimum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_min_f32_e32 v3, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v4, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; GFX940-NEXT: v_min_f32_e32 v1, v0, v2 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_no_fminimum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: v_minimum3_f32 v1, v0, v2, v2 +; GFX950-NEXT: s_setpc_b64 s[30:31] %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) %insert.0 = insertelement <2 x float> poison, float %max0, i32 0 @@ -3193,22 +3445,31 @@ define amdgpu_ps <2 x i32> @s_no_fminimum3_f32__multi_use(float inreg %a, float ; GFX12-NEXT: s_minimum_f32 s1, s0, s2 ; GFX12-NEXT: ; return to shader part epilog ; -; GFX9-LABEL: s_no_fminimum3_f32__multi_use: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc -; GFX9-NEXT: v_min_f32_e32 v1, s2, v0 -; GFX9-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 -; GFX9-NEXT: v_readfirstlane_b32 s0, v0 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_readfirstlane_b32 s1, v1 -; GFX9-NEXT: ; return to shader part epilog +; GFX940-LABEL: s_no_fminimum3_f32__multi_use: +; GFX940: ; %bb.0: +; GFX940-NEXT: v_mov_b32_e32 v0, s1 +; GFX940-NEXT: v_min_f32_e32 v1, s0, v0 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x7fc00000 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 +; GFX940-NEXT: s_nop 1 +; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX940-NEXT: v_min_f32_e32 v1, s2, v0 +; GFX940-NEXT: v_cmp_o_f32_e32 vcc, s2, v0 +; GFX940-NEXT: v_readfirstlane_b32 s0, v0 +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc +; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: v_readfirstlane_b32 s1, v1 +; GFX940-NEXT: ; return to shader part epilog +; +; GFX950-LABEL: s_no_fminimum3_f32__multi_use: +; GFX950: ; %bb.0: +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1 +; GFX950-NEXT: v_minimum3_f32 v1, v0, s2, s2 +; GFX950-NEXT: v_readfirstlane_b32 s0, v0 +; GFX950-NEXT: v_readfirstlane_b32 s1, v1 +; GFX950-NEXT: ; return to shader part epilog %max0 = call float @llvm.minimum.f32(float %a, float %b) %max1 = call float @llvm.minimum.f32(float %max0, float %c) %cast0 = bitcast float %max0 to i32 @@ -3372,6 +3633,3 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double %insert.1 = insertelement <2 x double> %insert.0, double %max1, i32 1 ret <2 x double> %insert.1 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX940: {{.*}} -; GFX950: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll index c1fdfa2c4cf9ab..df7355c2c57bfa 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll @@ -39,11 +39,7 @@ define float @v_maximum_f32(float %src0, float %src1) { ; GFX950-LABEL: v_maximum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32: @@ -89,11 +85,17 @@ define float @v_maximum_f32__nnan(float %src0, float %src1) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan: ; GFX10: ; %bb.0: @@ -151,11 +153,7 @@ define float @v_maximum_f32__nsz(float %src0, float %src1) { ; GFX950-LABEL: v_maximum_f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nsz: @@ -201,11 +199,17 @@ define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -267,11 +271,7 @@ define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0 -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan_src0: @@ -344,11 +344,7 @@ define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1 -; GFX950-NEXT: v_max_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_f32__nnan_src1: @@ -429,12 +425,8 @@ define void @s_maximum_f32(float inreg %src0, float inreg %src1) { ; GFX950-LABEL: s_maximum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s1 -; GFX950-NEXT: v_max_f32_e32 v1, s0, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v0 ; GFX950-NEXT: ;;#ASMEND @@ -521,15 +513,8 @@ define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_maximum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_max_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32: @@ -583,12 +568,19 @@ define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v2f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v2f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v2f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32__nnan: ; GFX10: ; %bb.0: @@ -657,15 +649,8 @@ define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_maximum_v2f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_max_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32__nsz: @@ -719,12 +704,19 @@ define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v2f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v2f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v2f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -808,16 +800,10 @@ define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) { ; GFX950-LABEL: s_maximum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s3 -; GFX950-NEXT: v_max_f32_e32 v1, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v0, s2 -; GFX950-NEXT: v_max_f32_e32 v3, s0, v0 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s1 +; GFX950-NEXT: v_maximum3_f32 v1, v0, s3, s3 +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND @@ -920,19 +906,9 @@ define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_maximum_v3f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_max_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_max_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32: @@ -995,13 +971,21 @@ define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v3f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v3f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v3f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32__nnan: ; GFX10: ; %bb.0: @@ -1082,19 +1066,9 @@ define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_maximum_v3f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_max_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_max_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32__nsz: @@ -1157,13 +1131,21 @@ define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v3f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v3f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v3f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1253,23 +1235,10 @@ define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_maximum_v4f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_max_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32: @@ -1341,14 +1310,23 @@ define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v4f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v4f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v4f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32__nnan: ; GFX10: ; %bb.0: @@ -1440,23 +1418,10 @@ define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_maximum_v4f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_max_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_max_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32__nsz: @@ -1528,14 +1493,23 @@ define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v4f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v4f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v4f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1663,39 +1637,14 @@ define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) { ; GFX950-LABEL: v_maximum_v8f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v16, v0, v8 -; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 -; GFX950-NEXT: v_max_f32_e32 v8, v1, v9 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v2, v10 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v3, v11 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v4, v12 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v5, v13 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v6, v14 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc -; GFX950-NEXT: v_max_f32_e32 v8, v7, v15 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc +; GFX950-NEXT: v_maximum3_f32 v0, v0, v8, v8 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v9, v9 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v10, v10 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v11, v11 +; GFX950-NEXT: v_maximum3_f32 v4, v4, v12, v12 +; GFX950-NEXT: v_maximum3_f32 v5, v5, v13, v13 +; GFX950-NEXT: v_maximum3_f32 v6, v6, v14, v14 +; GFX950-NEXT: v_maximum3_f32 v7, v7, v15, v15 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v8f32: @@ -1980,64 +1929,23 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000 -; GFX950-NEXT: v_max_f32_e32 v33, v0, v16 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16 -; GFX950-NEXT: v_max_f32_e32 v34, v1, v17 -; GFX950-NEXT: v_max_f32_e32 v35, v2, v18 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 -; GFX950-NEXT: v_max_f32_e32 v36, v3, v19 -; GFX950-NEXT: v_max_f32_e32 v37, v4, v20 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18 -; GFX950-NEXT: v_max_f32_e32 v38, v5, v21 -; GFX950-NEXT: v_max_f32_e32 v39, v6, v22 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19 -; GFX950-NEXT: v_max_f32_e32 v48, v7, v23 -; GFX950-NEXT: v_max_f32_e32 v49, v8, v24 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20 -; GFX950-NEXT: v_max_f32_e32 v50, v9, v25 -; GFX950-NEXT: v_max_f32_e32 v51, v10, v26 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21 -; GFX950-NEXT: v_max_f32_e32 v52, v11, v27 -; GFX950-NEXT: v_max_f32_e32 v53, v12, v28 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22 -; GFX950-NEXT: v_max_f32_e32 v54, v13, v29 -; GFX950-NEXT: v_max_f32_e32 v55, v14, v30 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23 +; GFX950-NEXT: v_maximum3_f32 v0, v0, v16, v16 +; GFX950-NEXT: v_maximum3_f32 v1, v1, v17, v17 +; GFX950-NEXT: v_maximum3_f32 v2, v2, v18, v18 +; GFX950-NEXT: v_maximum3_f32 v3, v3, v19, v19 +; GFX950-NEXT: v_maximum3_f32 v4, v4, v20, v20 +; GFX950-NEXT: v_maximum3_f32 v5, v5, v21, v21 +; GFX950-NEXT: v_maximum3_f32 v6, v6, v22, v22 +; GFX950-NEXT: v_maximum3_f32 v7, v7, v23, v23 +; GFX950-NEXT: v_maximum3_f32 v8, v8, v24, v24 +; GFX950-NEXT: v_maximum3_f32 v9, v9, v25, v25 +; GFX950-NEXT: v_maximum3_f32 v10, v10, v26, v26 +; GFX950-NEXT: v_maximum3_f32 v11, v11, v27, v27 +; GFX950-NEXT: v_maximum3_f32 v12, v12, v28, v28 +; GFX950-NEXT: v_maximum3_f32 v13, v13, v29, v29 +; GFX950-NEXT: v_maximum3_f32 v14, v14, v30, v30 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_max_f32_e32 v16, v15, v31 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc +; GFX950-NEXT: v_maximum3_f32 v15, v15, v31, v31 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v16f32: @@ -2176,3 +2084,4 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}} +; GFX9: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll index 2614fb3bf9f737..956de6de3aad3b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll @@ -39,11 +39,7 @@ define float @v_minimum_f32(float %src0, float %src1) { ; GFX950-LABEL: v_minimum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32: @@ -89,11 +85,17 @@ define float @v_minimum_f32__nnan(float %src0, float %src1) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan: ; GFX10: ; %bb.0: @@ -151,11 +153,7 @@ define float @v_minimum_f32__nsz(float %src0, float %src1) { ; GFX950-LABEL: v_minimum_f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nsz: @@ -201,11 +199,17 @@ define float @v_minimum_f32__nnan_nsz(float %src0, float %src1) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -267,11 +271,7 @@ define float @v_minimum_f32__nnan_src0(float %arg0, float %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0 -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan_src0: @@ -344,11 +344,7 @@ define float @v_minimum_f32__nnan_src1(float %src0, float %arg1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1 -; GFX950-NEXT: v_min_f32_e32 v2, v0, v1 -; GFX950-NEXT: v_mov_b32_e32 v3, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v1, v1 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_f32__nnan_src1: @@ -429,12 +425,8 @@ define void @s_minimum_f32(float inreg %src0, float inreg %src1) { ; GFX950-LABEL: s_minimum_f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s1 -; GFX950-NEXT: v_min_f32_e32 v1, s0, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s1, s1 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v0 ; GFX950-NEXT: ;;#ASMEND @@ -521,15 +513,8 @@ define <2 x float> @v_minimum_v2f32(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_minimum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_min_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32: @@ -583,12 +568,19 @@ define <2 x float> @v_minimum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) ; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v2f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v2f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v2f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32__nnan: ; GFX10: ; %bb.0: @@ -657,15 +649,8 @@ define <2 x float> @v_minimum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) { ; GFX950-LABEL: v_minimum_v2f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v4, v0, v2 -; GFX950-NEXT: v_mov_b32_e32 v5, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 -; GFX950-NEXT: v_min_f32_e32 v2, v1, v3 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32__nsz: @@ -719,12 +704,19 @@ define <2 x float> @v_minimum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %sr ; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v2f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v2f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v2f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v2, v2 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v3, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -808,16 +800,10 @@ define void @s_minimum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) { ; GFX950-LABEL: s_minimum_v2f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b32_e32 v0, s3 -; GFX950-NEXT: v_min_f32_e32 v1, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s1, v0 -; GFX950-NEXT: v_mov_b32_e32 v0, s2 -; GFX950-NEXT: v_min_f32_e32 v3, s0, v0 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, s0, v0 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; GFX950-NEXT: v_mov_b32_e32 v0, s1 +; GFX950-NEXT: v_minimum3_f32 v1, v0, s3, s3 +; GFX950-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-NEXT: v_minimum3_f32 v0, v0, s2, s2 ; GFX950-NEXT: ;;#ASMSTART ; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND @@ -920,19 +906,9 @@ define <3 x float> @v_minimum_v3f32(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_minimum_v3f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_min_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_min_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32: @@ -995,13 +971,21 @@ define <3 x float> @v_minimum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) ; GFX8-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v3f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v3f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v3f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32__nnan: ; GFX10: ; %bb.0: @@ -1082,19 +1066,9 @@ define <3 x float> @v_minimum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) { ; GFX950-LABEL: v_minimum_v3f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v6, v0, v3 -; GFX950-NEXT: v_mov_b32_e32 v7, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 -; GFX950-NEXT: v_min_f32_e32 v3, v1, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc -; GFX950-NEXT: v_min_f32_e32 v3, v2, v5 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32__nsz: @@ -1157,13 +1131,21 @@ define <3 x float> @v_minimum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %sr ; GFX8-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v3f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v3f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v3f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v3, v3 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v5, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1253,23 +1235,10 @@ define <4 x float> @v_minimum_v4f32(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_minimum_v4f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_min_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32: @@ -1341,14 +1310,23 @@ define <4 x float> @v_minimum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) ; GFX8-NEXT: v_min_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v4f32__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v4f32__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v4f32__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32__nnan: ; GFX10: ; %bb.0: @@ -1440,23 +1418,10 @@ define <4 x float> @v_minimum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) { ; GFX950-LABEL: v_minimum_v4f32__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v8, v0, v4 -; GFX950-NEXT: v_mov_b32_e32 v9, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 -; GFX950-NEXT: v_min_f32_e32 v4, v1, v5 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v2, v6 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc -; GFX950-NEXT: v_min_f32_e32 v4, v3, v7 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32__nsz: @@ -1528,14 +1493,23 @@ define <4 x float> @v_minimum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %sr ; GFX8-NEXT: v_min_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v4f32__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v4f32__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v4f32__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_minimum3_f32 v0, v0, v4, v4 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v5, v5 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v6, v6 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v7, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f32__nnan_nsz: ; GFX10: ; %bb.0: @@ -1663,39 +1637,14 @@ define <8 x float> @v_minimum_v8f32(<8 x float> %src0, <8 x float> %src1) { ; GFX950-LABEL: v_minimum_v8f32: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v16, v0, v8 -; GFX950-NEXT: v_mov_b32_e32 v17, 0x7fc00000 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 -; GFX950-NEXT: v_min_f32_e32 v8, v1, v9 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v2, v10 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v3, v11 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v4, v12 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v5, v13 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v6, v14 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc -; GFX950-NEXT: v_min_f32_e32 v8, v7, v15 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc +; GFX950-NEXT: v_minimum3_f32 v0, v0, v8, v8 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v9, v9 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v10, v10 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v11, v11 +; GFX950-NEXT: v_minimum3_f32 v4, v4, v12, v12 +; GFX950-NEXT: v_minimum3_f32 v5, v5, v13, v13 +; GFX950-NEXT: v_minimum3_f32 v6, v6, v14, v14 +; GFX950-NEXT: v_minimum3_f32 v7, v7, v15, v15 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v8f32: @@ -1980,64 +1929,23 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: v_mov_b32_e32 v32, 0x7fc00000 -; GFX950-NEXT: v_min_f32_e32 v33, v0, v16 -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v0, v16 -; GFX950-NEXT: v_min_f32_e32 v34, v1, v17 -; GFX950-NEXT: v_min_f32_e32 v35, v2, v18 -; GFX950-NEXT: v_cndmask_b32_e32 v0, v32, v33, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 -; GFX950-NEXT: v_min_f32_e32 v36, v3, v19 -; GFX950-NEXT: v_min_f32_e32 v37, v4, v20 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v32, v34, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v2, v18 -; GFX950-NEXT: v_min_f32_e32 v38, v5, v21 -; GFX950-NEXT: v_min_f32_e32 v39, v6, v22 -; GFX950-NEXT: v_cndmask_b32_e32 v2, v32, v35, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v3, v19 -; GFX950-NEXT: v_min_f32_e32 v48, v7, v23 -; GFX950-NEXT: v_min_f32_e32 v49, v8, v24 -; GFX950-NEXT: v_cndmask_b32_e32 v3, v32, v36, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v4, v20 -; GFX950-NEXT: v_min_f32_e32 v50, v9, v25 -; GFX950-NEXT: v_min_f32_e32 v51, v10, v26 -; GFX950-NEXT: v_cndmask_b32_e32 v4, v32, v37, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v5, v21 -; GFX950-NEXT: v_min_f32_e32 v52, v11, v27 -; GFX950-NEXT: v_min_f32_e32 v53, v12, v28 -; GFX950-NEXT: v_cndmask_b32_e32 v5, v32, v38, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v6, v22 -; GFX950-NEXT: v_min_f32_e32 v54, v13, v29 -; GFX950-NEXT: v_min_f32_e32 v55, v14, v30 -; GFX950-NEXT: v_cndmask_b32_e32 v6, v32, v39, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v7, v23 +; GFX950-NEXT: v_minimum3_f32 v0, v0, v16, v16 +; GFX950-NEXT: v_minimum3_f32 v1, v1, v17, v17 +; GFX950-NEXT: v_minimum3_f32 v2, v2, v18, v18 +; GFX950-NEXT: v_minimum3_f32 v3, v3, v19, v19 +; GFX950-NEXT: v_minimum3_f32 v4, v4, v20, v20 +; GFX950-NEXT: v_minimum3_f32 v5, v5, v21, v21 +; GFX950-NEXT: v_minimum3_f32 v6, v6, v22, v22 +; GFX950-NEXT: v_minimum3_f32 v7, v7, v23, v23 +; GFX950-NEXT: v_minimum3_f32 v8, v8, v24, v24 +; GFX950-NEXT: v_minimum3_f32 v9, v9, v25, v25 +; GFX950-NEXT: v_minimum3_f32 v10, v10, v26, v26 +; GFX950-NEXT: v_minimum3_f32 v11, v11, v27, v27 +; GFX950-NEXT: v_minimum3_f32 v12, v12, v28, v28 +; GFX950-NEXT: v_minimum3_f32 v13, v13, v29, v29 +; GFX950-NEXT: v_minimum3_f32 v14, v14, v30, v30 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_min_f32_e32 v16, v15, v31 -; GFX950-NEXT: v_cndmask_b32_e32 v7, v32, v48, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v8, v24 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v8, v32, v49, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v9, v25 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v9, v32, v50, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v10, v26 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v10, v32, v51, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v11, v27 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v11, v32, v52, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v12, v28 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v12, v32, v53, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v13, v29 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v13, v32, v54, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v14, v30 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v14, v32, v55, vcc -; GFX950-NEXT: v_cmp_o_f32_e32 vcc, v15, v31 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v15, v32, v16, vcc +; GFX950-NEXT: v_minimum3_f32 v15, v15, v31, v31 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v16f32: @@ -2176,3 +2084,4 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}} +; GFX9: {{.*}}