From 7b542cdb6ff8d90534ff4efc5acb9b885a769876 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 9 Dec 2022 11:43:03 -0800 Subject: [PATCH] Fix optimization rules for narrow types: wrap i8 results to 8 bits. This fixes #5405. In the egraph mid-end's optimization rules, we were rewriting e.g. imuls of two iconsts to an iconst of the result, but without masking off the high bits (beyond the result type's width). This was producing iconsts with set high bits beyond their types' width, which is not legal. In addition, this PR adds some optimizations to the algebraic rules to recognize e.g. `x == x` (and all other integer comparison operators) and resolve to 1 or 0 as appropriate. --- cranelift/codegen/src/isle_prelude.rs | 6 +++ cranelift/codegen/src/opts/algebraic.isle | 33 ++++++++++++++ cranelift/codegen/src/opts/cprop.isle | 29 ++++++------- cranelift/codegen/src/prelude.isle | 4 ++ .../filetests/filetests/egraph/cprop.clif | 43 +++++++++++++++++++ .../filetests/egraph/issue-5405.clif | 16 +++++++ 6 files changed, 116 insertions(+), 15 deletions(-) create mode 100644 cranelift/filetests/filetests/egraph/cprop.clif create mode 100644 cranelift/filetests/filetests/egraph/issue-5405.clif diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 23270f0c49e3..580794c867c2 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -509,6 +509,12 @@ macro_rules! isle_common_prelude_methods { Imm64::new(x as i64) } + #[inline] + fn imm64_masked(&mut self, ty: Type, x: u64) -> Imm64 { + debug_assert!(ty.bits() <= 64); + Imm64::new((x & ((1u64 << ty.bits()) - 1)) as i64) + } + #[inline] fn simm32(&mut self, x: Imm64) -> Option { let x64: i64 = x.into(); diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle index e4638222fe21..d407474131ef 100644 --- a/cranelift/codegen/src/opts/algebraic.isle +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -207,3 +207,36 @@ (rule (simplify (select ty (uextend _ c @ (icmp _ _ _ _)) x y)) (select ty c x y)) + +;; `x == x` is always true for integers; `x != x` is false. Strict +;; inequalities are false, and loose inequalities are true. +(rule (simplify + (icmp ty (IntCC.Equal) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.NotEqual) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.UnsignedGreaterThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.SignedGreaterThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.SignedGreaterThanOrEqual) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.UnsignedLessThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.UnsignedLessThanOrEqual) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.SignedLessThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.SignedLessThanOrEqual) x x)) + (iconst ty (imm64 1))) diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index ac6ce778e006..d7ba06dcf634 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -4,56 +4,56 @@ (iadd (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_add k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_add k1 k2))))) (rule (simplify (isub (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_sub k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_sub k1 k2))))) (rule (simplify (imul (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_mul k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_mul k1 k2))))) (rule (simplify (sdiv (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) (if-let d (u64_sdiv k1 k2)) - (subsume (iconst ty (imm64 d)))) + (subsume (iconst ty (imm64_masked ty d)))) (rule (simplify (udiv (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) (if-let d (u64_udiv k1 k2)) - (subsume (iconst ty (imm64 d)))) + (subsume (iconst ty (imm64_masked ty d)))) (rule (simplify (bor (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_or k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_or k1 k2))))) (rule (simplify (band (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_and k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_and k1 k2))))) (rule (simplify (bxor (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_xor k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_xor k1 k2))))) (rule (simplify (bnot (fits_in_64 ty) (iconst ty (u64_from_imm64 k)))) - (subsume (iconst ty (imm64 (u64_not k))))) + (subsume (iconst ty (imm64_masked ty (u64_not k))))) ;; Canonicalize via commutativity: push immediates to the right. ;; @@ -99,23 +99,23 @@ (rule (simplify (isub ty (isub ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) - (isub ty x (iconst ty (imm64 (u64_add k1 k2))))) + (isub ty x (iconst ty (imm64_masked ty (u64_add k1 k2))))) (rule (simplify (isub ty (isub ty (iconst ty (u64_from_imm64 k1)) x) (iconst ty (u64_from_imm64 k2)))) - (isub ty (iconst ty (imm64 (u64_sub k1 k2))) x)) + (isub ty (iconst ty (imm64_masked ty (u64_sub k1 k2))) x)) (rule (simplify (isub ty (iadd ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) - (isub ty x (iconst ty (imm64 (u64_sub k2 k1))))) + (isub ty x (iconst ty (imm64_masked ty (u64_sub k2 k1))))) (rule (simplify (iadd ty (isub ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) - (iadd ty x (iconst ty (imm64 (u64_sub k2 k1))))) + (iadd ty x (iconst ty (imm64_masked ty (u64_sub k2 k1))))) (rule (simplify (iadd ty (isub ty (iconst ty (u64_from_imm64 k1)) x) (iconst ty (u64_from_imm64 k2)))) - (isub ty (iconst ty (imm64 (u64_add k1 k2))) x)) + (isub ty (iconst ty (imm64_masked ty (u64_add k1 k2))) x)) (rule (simplify (imul ty (imul ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) @@ -138,4 +138,3 @@ y) ;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs - diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 0f17b6b442cc..5a0aa3376cbf 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -349,6 +349,10 @@ (decl pure imm64 (u64) Imm64) (extern constructor imm64 imm64) +;; Create a new Imm64, masked to the width of the given type. +(decl pure imm64_masked (Type u64) Imm64) +(extern constructor imm64_masked imm64_masked) + ;; Extract a `u64` from an `Ieee32`. (decl u64_from_ieee32 (u64) Ieee32) (extern extractor infallible u64_from_ieee32 u64_from_ieee32) diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif new file mode 100644 index 000000000000..bc76e1d546e7 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -0,0 +1,43 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %f0() -> i8 { +block0: + v1 = iconst.i8 51 + v2 = imul.i8 v1, v1 + return v2 +} + +; check: v9 = iconst.i8 41 +; nextln: return v9 + +function %f1() -> i16 { +block0: + v1 = iconst.i16 1 + v2 = bnot.i16 v1 + return v2 +} + +; check: v3 = iconst.i16 0xfffe +; nextln: return v3 + +function %f2(i8) -> i8 { +block0(v1: i8): + v2 = icmp eq v1, v1 + return v2 +} + +; check: v3 = iconst.i8 1 +; check: return v3 + +function %f3(i8) -> i8 { +block0(v1: i8): + v2 = icmp ne v1, v1 + return v2 +} + +; check: v3 = iconst.i8 0 +; check: return v3 + diff --git a/cranelift/filetests/filetests/egraph/issue-5405.clif b/cranelift/filetests/filetests/egraph/issue-5405.clif new file mode 100644 index 000000000000..c453875de5cf --- /dev/null +++ b/cranelift/filetests/filetests/egraph/issue-5405.clif @@ -0,0 +1,16 @@ +test interpret +test run +set opt_level=speed_and_size +set use_egraphs=true +target aarch64 + +function %a(i64) -> i8 system_v { +block0(v0: i64): + v6 = iconst.i8 51 + v17 = imul v6, v6 ; v6 = 51, v6 = 51 + v18 = icmp eq v17, v17 + v52 = imul v18, v18 + return v52 +} + +; run: %a(129) == 1