-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SDAG] Reverse the canonicalization of isInf/isNanOrInf #81404
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-x86 Author: Yingwei Zheng (dtcxzyw) ChangesIn commit 2b58244, we canonicalize the isInf/isNanOrInf idiom into fabs+fcmp for better analysis/codegen (See also the discussion in #76338). This patch reverses the fabs+fcmp to Fixes the regression introduced by 2b58244 and #80414 (comment). Patch is 71.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81404.diff 9 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5ce1013f30fd1b..22ceca33195cab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3467,12 +3467,50 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
}
+SDValue SelectionDAGBuilder::lowerIsFpClass(Value *ClassVal,
+ FPClassTest ClassTest) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const DataLayout &DL = DAG.getDataLayout();
+ SDLoc sdl = getCurSDLoc();
+
+ EVT DestVT =
+ TLI.getValueType(DL, CmpInst::makeCmpResultType(ClassVal->getType()));
+ EVT ArgVT = TLI.getValueType(DL, ClassVal->getType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
+ SDValue Op = getValue(ClassVal);
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(!F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
+ // If ISD::IS_FPCLASS should be expanded, do it right now, because the
+ // expansion can use illegal types. Making expansion early allows
+ // legalizing these types prior to selection.
+ if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT))
+ return TLI.expandIS_FPCLASS(DestVT, Op, ClassTest, Flags, sdl, DAG);
+
+ SDValue Check = DAG.getTargetConstant(ClassTest, sdl, MVT::i32);
+ return DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
+}
+
void SelectionDAGBuilder::visitFCmp(const User &I) {
FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
- if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+ if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) {
predicate = FC->getPredicate();
- else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+
+ // Reverse the canonicalization if it is a FP class test
+ auto ShouldReverseTransform = [](FPClassTest ClassTest) {
+ return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
+ };
+ auto [ClassVal, ClassTest] =
+ fcmpToClassTest(predicate, *FC->getParent()->getParent(),
+ FC->getOperand(0), FC->getOperand(1));
+ if (ClassVal && (ShouldReverseTransform(ClassTest) ||
+ ShouldReverseTransform(~ClassTest))) {
+ setValue(&I, lowerIsFpClass(ClassVal, ClassTest));
+ return;
+ }
+ } else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
predicate = FCmpInst::Predicate(FC->getPredicate());
+
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -6666,29 +6704,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(0));
return;
case Intrinsic::is_fpclass: {
- const DataLayout DLayout = DAG.getDataLayout();
- EVT DestVT = TLI.getValueType(DLayout, I.getType());
- EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
- FPClassTest Test = static_cast<FPClassTest>(
- cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
- MachineFunction &MF = DAG.getMachineFunction();
- const Function &F = MF.getFunction();
- SDValue Op = getValue(I.getArgOperand(0));
- SDNodeFlags Flags;
- Flags.setNoFPExcept(
- !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
- // If ISD::IS_FPCLASS should be expanded, do it right now, because the
- // expansion can use illegal types. Making expansion early allows
- // legalizing these types prior to selection.
- if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
- SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
- setValue(&I, Result);
- return;
- }
-
- SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
- SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
- setValue(&I, V);
+ setValue(&I,
+ lowerIsFpClass(
+ I.getArgOperand(0),
+ static_cast<FPClassTest>(
+ cast<ConstantInt>(I.getArgOperand(1))->getZExtValue())));
return;
}
case Intrinsic::get_fpenv: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47657313cb6a3b..dfc9369117c79d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -700,6 +700,7 @@ class SelectionDAGBuilder {
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+ SDValue lowerIsFpClass(Value *ClassVal, FPClassTest ClassTest);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/test/CodeGen/AArch64/fpclass-test.ll b/llvm/test/CodeGen/AArch64/fpclass-test.ll
new file mode 100644
index 00000000000000..b549722690afdd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fpclass-test.ll
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define i1 @test_is_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: mov x8, #9218868437227405311 // =0x7fefffffffffffff
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp ueq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf_or_nan(double %arg) {
+; CHECK-LABEL: test_is_not_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp one double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_inf(double %arg) {
+; CHECK-LABEL: test_is_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp oeq double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_is_not_inf(double %arg) {
+; CHECK-LABEL: test_is_not_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %abs = tail call double @llvm.fabs.f64(double %arg)
+ %ret = fcmp une double %abs, 0x7FF0000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #9223090561878065151 // =0x7ffeffffffffffff
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldr x9, [sp, #8]
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp ueq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf_or_nan(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf_or_nan:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #9223090561878065152 // =0x7fff000000000000
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldr x9, [sp, #8]
+; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp one fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
+
+define i1 @test_fp128_is_not_inf(fp128 %arg) {
+; CHECK-LABEL: test_fp128_is_not_inf:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %abs = tail call fp128 @llvm.fabs.f128(fp128 %arg)
+ %ret = fcmp une fp128 %abs, 0xL00000000000000007FFF000000000000
+ ret i1 %ret
+}
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index 458bd7eeba16cf..834417b98743a8 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -58,22 +58,14 @@ define i32 @replace_isinf_call_f64(double %x) {
define i32 @replace_isinf_call_f128(fp128 %x) {
; CHECK-LABEL: replace_isinf_call_f128:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: str q0, [sp]
-; CHECK-NEXT: ldrb w8, [sp, #15]
-; CHECK-NEXT: and w8, w8, #0x7f
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr q0, [sp]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: bl __eqtf2
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: str q0, [sp, #-16]!
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ldp x9, x8, [sp], #16
+; CHECK-NEXT: and x8, x8, #0x7fffffffffffffff
+; CHECK-NEXT: eor x8, x8, #0x7fff000000000000
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: cmp x8, #0
; CHECK-NEXT: cset w0, eq
-; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
%abs = tail call fp128 @llvm.fabs.f128(fp128 %x)
%cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index 6fa7df913812a3..ed9ce4d62383b1 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -61,10 +61,10 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; SI-NEXT: s_load_dword s0, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x207
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_nlg_f32_e64 s[0:1], |s0|, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
@@ -72,11 +72,11 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x207
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_nlg_f32_e64 s[2:3], |s2|, v0
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x207
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -143,25 +143,29 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
; SI-LABEL: test_isfinite_pattern_0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x1f8
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_0:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
@@ -173,8 +177,10 @@ define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %o
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
@@ -349,13 +355,13 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
-; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -363,11 +369,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; VI-LABEL: test_isfinite_not_pattern_2:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_cmp_o_f32_e64 s[4:5], s2, s2
-; VI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0
-; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s3, v0
+; VI-NEXT: s_and_b64 s[2:3], s[4:5], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -380,7 +386,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocaptur
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cmp_o_f32_e64 s2, s2, s2
-; GFX11-NEXT: v_cmp_neq_f32_e64 s3, 0x7f800000, |s3|
+; GFX11-NEXT: v_cmp_class_f32_e64 s3, s3, 0x1fb
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s2, s2, s3
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
@@ -405,11 +411,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; SI-NEXT: v_mov_b32_e32 v0, 0x1fb
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_cmp_u_f32_e64 s[0:1], s2, s2
-; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s2|, v0
-; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -418,11 +424,11 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000
+; VI-NEXT: v_mov_b32_e32 v0, 0x1fb
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_cmp_u_f32_e64 s[2:3], s4, s4
-; VI-NEXT: v_cmp_neq_f32_e64 s[4:5], |s4|, v0
-; VI-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5]
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
@@ -437,7 +443,7 @@ define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocaptur
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_cmp_u_f32_e64 s3, s2, s2
-; GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x7f800000, |s2|
+; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1fb
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s2, s3, s2
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
@@ -458,25 +464,29 @@ define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %o
; SI-LABEL: test_isfinite_pattern_4:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT: s_load_dword s0, s[0:1], 0xb
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x1f8
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_cmp_class_f32_e32 vcc, s0, v0
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2
+; SI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; SI-NEXT: s_and_b64 s[0:1], s[0:1], vcc
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_isfinite_pattern_4:
; VI: ; %bb.0:
-; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x1f8
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0
+; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s4, s4
+; VI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0
+; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
-; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NE...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you add vector test coverage?
25e1a77
to
4558a9d
Compare
Done. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One minor nit about braces, but otherwise this looks good.
const Function &F = MF.getFunction(); | ||
SDValue Op = getValue(ClassVal); | ||
SDNodeFlags Flags; | ||
Flags.setNoFPExcept(!F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this can just be true, is.fpclass can never raise exceptions (don't even need the flag?)
@@ -3467,12 +3467,51 @@ void SelectionDAGBuilder::visitICmp(const User &I) { | |||
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); | |||
} | |||
|
|||
SDValue SelectionDAGBuilder::lowerIsFpClass(Value *ClassVal, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't feel great about having SelectionDAGBuilder perform random optimizations. I can see the appeal of reusing the IR implementation of fcmpToClassTest. You would also need to reimplement the same thing in GlobalISel.
Maybe it would be better to do this in CodeGenPrepare?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will post an alternative later.
Looks like we may still get libcall with this PR. https://godbolt.org/z/W1Pq8G3ar |
You need an additional fabs. This patch only reverses the transforms in 2b58244 to avoid breaking AMDGPU tests. |
Closed as #81572 will be landed. |
In commit 2b58244, we canonicalize the isInf/isNanOrInf idiom into fabs+fcmp for better analysis/codegen (See also the discussion in #76338).
This patch reverses the fabs+fcmp to
is.fpclass
. If theis.fpclass
is not supported by the target, it will be expanded by TLI.Fixes the regression introduced by 2b58244 and #80414 (comment).