From 8a2d9bc74693d2ba08cc7f05eb349ca11429677b Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Mon, 12 Feb 2024 11:23:55 -0600
Subject: [PATCH] Constant propagate int-to-float conversions (#7915)

This commit is born out of a fuzz bug on x64 that was discovered recently.
Today, on `main`, and in the 17.0.1 release Wasmtime will panic when compiling
this wasm module for x64:

    (module
      (func (result v128)
        i32.const 0
        i32x4.splat
        f64x2.convert_low_i32x4_u))

panicking with:

    thread '<unnamed>' panicked at /home/alex/.cargo/registry/src/index.crates.io-6f17d22bba15001f/cranelift-codegen-0.104.1/src/machinst/lower.rs:766:21:
    should be implemented in ISLE: inst = `v6 = fcvt_from_uint.f64x2 v13  ; v13 = const0`, type = `Some(types::F64X2)`
    note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace

Bisections points to the "cause" of this regression as #7859 which
more-or-less means that this has always been an issue and that PR just
happened to expose the issue. What's happening here is that egraph
optimizations are turning the IR into a form that the x64 backend can't
codegen. Namely there's no general purpose lowering of i64x2 being
converted to f64x2. The Wasm frontend never produces this but the
optimizations internally end up producing this.

Notably here the result of this function is constant and what's
happening is that a convert-of-a-splat is happening. In lieu of adding
the full general lowering to x64 (which is perhaps overdue since this is
the second or third time this panic has been triggered) I've opted to
add constant propagation optimizations for int-to-float conversions.
These are all based on the Rust `as` operator which has the same
semantics as Cranelift. This is enough to fix the issue here for the
time being.
---
 cranelift/codegen/src/opts.rs                 |  19 ++
 cranelift/codegen/src/opts/cprop.isle         |  20 ++
 cranelift/codegen/src/opts/vector.isle        |   5 +-
 .../filetests/egraph/fcvt-from-int.clif       | 222 ++++++++++++++++++
 tests/misc_testsuite/int-to-float-splat.wast  |  15 ++
 5 files changed, 279 insertions(+), 2 deletions(-)
 create mode 100644 cranelift/filetests/filetests/egraph/fcvt-from-int.clif
 create mode 100644 tests/misc_testsuite/int-to-float-splat.wast
diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs
index c24e6190eeea..e897b2dc4437 100644
--- a/cranelift/codegen/src/opts.rs
+++ b/cranelift/codegen/src/opts.rs
@@ -262,4 +262,23 @@ impl<'a, 'b, 'c> generated_code::Context for IsleContext<'a, 'b, 'c> {
     fn uextend_maybe_etor(&mut self, value: Value, returns: &mut Self::uextend_maybe_etor_returns) {
         *returns = MaybeUnaryEtorIter::new(Opcode::Uextend, value);
     }
+
+    // NB: Cranelift's defined semantics for `fcvt_from_{s,u}int` match Rust's
+    // own semantics for converting an integer to a float, so these are all
+    // implemented with `as` conversions in Rust.
+    fn f32_from_uint(&mut self, n: u64) -> Ieee32 {
+        Ieee32::with_float(n as f32)
+    }
+
+    fn f64_from_uint(&mut self, n: u64) -> Ieee64 {
+        Ieee64::with_float(n as f64)
+    }
+
+    fn f32_from_sint(&mut self, n: i64) -> Ieee32 {
+        Ieee32::with_float(n as f32)
+    }
+
+    fn f64_from_sint(&mut self, n: i64) -> Ieee64 {
+        Ieee64::with_float(n as f64)
+    }
 }
diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle
index f84b300f62bd..77b76a508232 100644
--- a/cranelift/codegen/src/opts/cprop.isle
+++ b/cranelift/codegen/src/opts/cprop.isle
@@ -244,3 +244,23 @@
                       (bxor ty a b@(iconst _ _))
                       (bxor ty c d@(iconst _ _))))
       (bxor ty (bxor ty a c) (bxor ty b d)))
+
+
+;; Constant fold int-to-float conversions.
+(rule (simplify (fcvt_from_uint $F32 (iconst_u _ n)))
+      (f32const $F32 (f32_from_uint n)))
+(rule (simplify (fcvt_from_uint $F64 (iconst_u _ n)))
+      (f64const $F64 (f64_from_uint n)))
+(rule (simplify (fcvt_from_sint $F32 (iconst_s _ n)))
+      (f32const $F32 (f32_from_sint n)))
+(rule (simplify (fcvt_from_sint $F64 (iconst_s _ n)))
+      (f64const $F64 (f64_from_sint n)))
+
+(decl f32_from_uint (u64) Ieee32)
+(extern constructor f32_from_uint f32_from_uint)
+(decl f64_from_uint (u64) Ieee64)
+(extern constructor f64_from_uint f64_from_uint)
+(decl f32_from_sint (i64) Ieee32)
+(extern constructor f32_from_sint f32_from_sint)
+(decl f64_from_sint (i64) Ieee64)
+(extern constructor f64_from_sint f64_from_sint)
diff --git a/cranelift/codegen/src/opts/vector.isle b/cranelift/codegen/src/opts/vector.isle
index fd47fe69c41b..5ad57d8dc604 100644
--- a/cranelift/codegen/src/opts/vector.isle
+++ b/cranelift/codegen/src/opts/vector.isle
@@ -1,8 +1,9 @@
 ;; For various ops lift a splat outside of the op to try to open up
 ;; optimization opportunities with scalars.
-;;
+
 ;; NB: for int-to-float conversion op this simplification is also
-;; required for correctness at this time due to #6562
+;; required for the x64 backend because it doesn't fully implement int-to-float
+;; conversions for 64x2 vectors, for more information see #6562
 (rule (simplify (fcvt_from_uint float_vector_ty (splat _ x)))
       (splat float_vector_ty (fcvt_from_uint (lane_type float_vector_ty) x)))
 (rule (simplify (fcvt_from_sint float_vector_ty (splat _ x)))
diff --git a/cranelift/filetests/filetests/egraph/fcvt-from-int.clif b/cranelift/filetests/filetests/egraph/fcvt-from-int.clif
new file mode 100644
index 000000000000..9d98b01412b0
--- /dev/null
+++ b/cranelift/filetests/filetests/egraph/fcvt-from-int.clif
@@ -0,0 +1,222 @@
+test optimize
+set opt_level=speed
+target x86_64
+target aarch64
+target s390x
+target riscv64
+
+function %i32_0_to_f32() -> f32 {
+block0:
+  v0 = iconst.i32 0
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+  ; check: v2 = f32const 0.0
+  ; check: return v2
+}
+
+function %i32_neg1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i32 -1
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+  ; check: v2 = f32const -0x1.000000p0
+  ; check: return v2
+}
+
+function %i32_1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i32 1
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+  ; check: v2 = f32const 0x1.000000p0
+  ; check: return v2
+}
+
+function %u32_0_to_f32() -> f32 {
+block0:
+  v0 = iconst.i32 0
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+  ; check: v2 = f32const 0.0
+  ; check: return v2
+}
+
+function %u32_neg1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i32 -1
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+  ; check: v2 = f32const 0x1.000000p32
+  ; check: return v2
+}
+
+function %u32_1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i32 1
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+  ; check: v2 = f32const 0x1.000000p0
+  ; check: return v2
+}
+
+function %i32_0_to_f64() -> f64 {
+block0:
+  v0 = iconst.i32 0
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+  ; check: v2 = f64const 0.0
+  ; check: return v2
+}
+
+function %i32_neg1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i32 -1
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+  ; check: v2 = f64const -0x1.0000000000000p0
+  ; check: return v2
+}
+
+function %i32_1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i32 1
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+  ; check: v2 = f64const 0x1.0000000000000p0
+  ; check: return v2
+}
+
+function %u32_0_to_f64() -> f64 {
+block0:
+  v0 = iconst.i32 0
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+  ; check: v2 = f64const 0.0
+  ; check: return v2
+}
+
+function %u32_neg1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i32 -1
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+  ; check: v2 = f64const 0x1.fffffffe00000p31
+  ; check: return v2
+}
+
+function %u32_1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i32 1
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+  ; check: v2 = f64const 0x1.0000000000000p0
+  ; check: return v2
+}
+
+function %i64_0_to_f32() -> f32 {
+block0:
+  v0 = iconst.i64 0
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+  ; check: v2 = f32const 0.0
+  ; check: return v2
+}
+
+function %i64_neg1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i64 -1
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+  ; check: v2 = f32const -0x1.000000p0
+  ; check: return v2
+}
+
+function %i64_1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i64 1
+  v1 = fcvt_from_sint.f32 v0
+  return v1
+  ; check: v2 = f32const 0x1.000000p0
+  ; check: return v2
+}
+
+function %u64_0_to_f32() -> f32 {
+block0:
+  v0 = iconst.i64 0
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+  ; check: v2 = f32const 0.0
+  ; check: return v2
+}
+
+function %u64_neg1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i64 -1
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+  ; check: v2 = f32const 0x1.000000p64
+  ; check: return v2
+}
+
+function %u64_1_to_f32() -> f32 {
+block0:
+  v0 = iconst.i64 1
+  v1 = fcvt_from_uint.f32 v0
+  return v1
+  ; check: v2 = f32const 0x1.000000p0
+  ; check: return v2
+}
+
+function %i64_0_to_f64() -> f64 {
+block0:
+  v0 = iconst.i64 0
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+  ; check: v2 = f64const 0.0
+  ; check: return v2
+}
+
+function %i64_neg1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i64 -1
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+  ; check: v2 = f64const -0x1.0000000000000p0
+  ; check: return v2
+}
+
+function %i64_1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i64 1
+  v1 = fcvt_from_sint.f64 v0
+  return v1
+  ; check: v2 = f64const 0x1.0000000000000p0
+  ; check: return v2
+}
+
+function %u64_0_to_f64() -> f64 {
+block0:
+  v0 = iconst.i64 0
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+  ; check: v2 = f64const 0.0
+  ; check: return v2
+}
+
+function %u64_neg1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i64 -1
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+  ; check: v2 = f64const 0x1.0000000000000p64
+  ; check: return v2
+}
+
+function %u64_1_to_f64() -> f64 {
+block0:
+  v0 = iconst.i64 1
+  v1 = fcvt_from_uint.f64 v0
+  return v1
+  ; check: v2 = f64const 0x1.0000000000000p0
+  ; check: return v2
+}
diff --git a/tests/misc_testsuite/int-to-float-splat.wast b/tests/misc_testsuite/int-to-float-splat.wast
new file mode 100644
index 000000000000..2a6110200293
--- /dev/null
+++ b/tests/misc_testsuite/int-to-float-splat.wast
@@ -0,0 +1,15 @@
+(module
+  (func (param i32) (result v128)
+    local.get 0
+    i32x4.splat
+    f64x2.convert_low_i32x4_u
+  )
+)
+
+(module
+  (func (result v128)
+    i32.const 0
+    i32x4.splat
+    f64x2.convert_low_i32x4_u
+  )
+)