pulley: Implement float arithmetic operations

Fill out enough to get `f32.wast` and `f64.wast` spec tests working. A minor ABI issue was discovered along the way which is also required to get a new test working on both 32 and 64-bit platforms. cc bytecodealliance#9783
alexcrichton · Dec 11, 2024 · 25d7740 · 25d7740
1 parent b5cad7c
commit 25d7740
Show file tree

Hide file tree

Showing 8 changed files with 314 additions and 78 deletions.
diff --git a/cranelift/codegen/meta/src/pulley.rs b/cranelift/codegen/meta/src/pulley.rs
@@ -39,10 +39,7 @@ impl Inst<'_> {
             .iter()
             .map(|(name, ty)| match (*name, *ty) {
                 ("operands", "BinaryOperands < XReg >") => Operand::Binop { reg: "XReg" },
-                (name, "RegSet < XReg >") => Operand::Normal {
-                    name,
-                    ty: "VecXReg",
-                },
+                ("operands", "BinaryOperands < FReg >") => Operand::Binop { reg: "FReg" },
                 ("dst", ty) => Operand::Writable { name, ty },
                 (name, ty) => Operand::Normal { name, ty },
             })

diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs
@@ -585,15 +585,15 @@ where
             || clobber_size > 0
             || fixed_frame_storage_size > 0
         {
-            16 // FP, LR
+            P::pointer_width().bytes() * 2 // FP, LR
         } else {
             0
         };
 
         FrameLayout {
             incoming_args_size,
             tail_args_size,
-            setup_area_size,
+            setup_area_size: setup_area_size.into(),
             clobber_size,
             fixed_frame_storage_size,
             outgoing_args_size,

diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle
@@ -590,3 +590,58 @@
 
 (rule (lower (has_type $F64 (fpromote val @ (value_type $F32))))
   (pulley_f64_from_f32 val))
+
+;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fadd a b))) (pulley_fadd32 a b))
+(rule (lower (has_type $F64 (fadd a b))) (pulley_fadd64 a b))
+
+;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fsub a b))) (pulley_fsub32 a b))
+(rule (lower (has_type $F64 (fsub a b))) (pulley_fsub64 a b))
+
+;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fmul a b))) (pulley_fmul32 a b))
+(rule (lower (has_type $F64 (fmul a b))) (pulley_fmul64 a b))
+
+;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fdiv a b))) (pulley_fdiv32 a b))
+(rule (lower (has_type $F64 (fdiv a b))) (pulley_fdiv64 a b))
+
+;;;; Rules for `fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fmax a b))) (pulley_fmaximum32 a b))
+(rule (lower (has_type $F64 (fmax a b))) (pulley_fmaximum64 a b))
+
+;;;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fmin a b))) (pulley_fminimum32 a b))
+(rule (lower (has_type $F64 (fmin a b))) (pulley_fminimum64 a b))
+
+;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (trunc a))) (pulley_ftrunc32 a))
+(rule (lower (has_type $F64 (trunc a))) (pulley_ftrunc64 a))
+
+;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (floor a))) (pulley_ffloor32 a))
+(rule (lower (has_type $F64 (floor a))) (pulley_ffloor64 a))
+
+;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (ceil a))) (pulley_fceil32 a))
+(rule (lower (has_type $F64 (ceil a))) (pulley_fceil64 a))
+
+;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (nearest a))) (pulley_fnearest32 a))
+(rule (lower (has_type $F64 (nearest a))) (pulley_fnearest64 a))
+
+;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (sqrt a))) (pulley_fsqrt32 a))
+(rule (lower (has_type $F64 (sqrt a))) (pulley_fsqrt64 a))
diff --git a/cranelift/codegen/src/isa/pulley_shared/mod.rs b/cranelift/codegen/src/isa/pulley_shared/mod.rs
@@ -54,6 +54,10 @@ impl PointerWidth {
             PointerWidth::PointerWidth64 => 64,
         }
     }
+
+    pub fn bytes(self) -> u8 {
+        self.bits() / 8
+    }
 }
 
 /// A Pulley backend.

diff --git a/crates/wasmtime/src/runtime/vm/libcalls.rs b/crates/wasmtime/src/runtime/vm/libcalls.rs
@@ -1313,69 +1313,12 @@ pub mod relocs {
         func(f)
     }
 
-    const TOINT_32: f32 = 1.0 / f32::EPSILON;
-    const TOINT_64: f64 = 1.0 / f64::EPSILON;
-
-    // NB: replace with `round_ties_even` from libstd when it's stable as
-    // tracked by rust-lang/rust#96710
     pub extern "C" fn nearestf32(x: f32) -> f32 {
-        // Rust doesn't have a nearest function; there's nearbyint, but it's not
-        // stabilized, so do it manually.
-        // Nearest is either ceil or floor depending on which is nearest or even.
-        // This approach exploited round half to even default mode.
-        let i = x.to_bits();
-        let e = i >> 23 & 0xff;
-        if e >= 0x7f_u32 + 23 {
-            // Check for NaNs.
-            if e == 0xff {
-                // Read the 23-bits significand.
-                if i & 0x7fffff != 0 {
-                    // Ensure it's arithmetic by setting the significand's most
-                    // significant bit to 1; it also works for canonical NaNs.
-                    return f32::from_bits(i | (1 << 22));
-                }
-            }
-            x
-        } else {
-            let abs = float_function! {
-                std: f32::abs,
-                core: libm::fabsf,
-            };
-            let copysign = float_function! {
-                std: f32::copysign,
-                core: libm::copysignf,
-            };
-
-            copysign(abs(x) + TOINT_32 - TOINT_32, x)
-        }
+        x.round_ties_even()
     }
 
     pub extern "C" fn nearestf64(x: f64) -> f64 {
-        let i = x.to_bits();
-        let e = i >> 52 & 0x7ff;
-        if e >= 0x3ff_u64 + 52 {
-            // Check for NaNs.
-            if e == 0x7ff {
-                // Read the 52-bits significand.
-                if i & 0xfffffffffffff != 0 {
-                    // Ensure it's arithmetic by setting the significand's most
-                    // significant bit to 1; it also works for canonical NaNs.
-                    return f64::from_bits(i | (1 << 51));
-                }
-            }
-            x
-        } else {
-            let abs = float_function! {
-                std: f64::abs,
-                core: libm::fabs,
-            };
-            let copysign = float_function! {
-                std: f64::copysign,
-                core: libm::copysign,
-            };
-
-            copysign(abs(x) + TOINT_64 - TOINT_64, x)
-        }
+        x.round_ties_even()
     }
 
     pub extern "C" fn fmaf32(a: f32, b: f32, c: f32) -> f32 {

diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs
@@ -399,7 +399,6 @@ impl WastTest {
                 "misc_testsuite/embenchen_fasta.wast",
                 "misc_testsuite/embenchen_ifs.wast",
                 "misc_testsuite/embenchen_primes.wast",
-                "misc_testsuite/float-round-doesnt-load-too-much.wast",
                 "misc_testsuite/int-to-float-splat.wast",
                 "misc_testsuite/issue4890.wast",
                 "misc_testsuite/issue6562.wast",
@@ -430,25 +429,14 @@ impl WastTest {
                 "misc_testsuite/winch/_simd_load.wast",
                 "misc_testsuite/winch/_simd_multivalue.wast",
                 "misc_testsuite/winch/_simd_store.wast",
-                "spec_testsuite/call.wast",
                 "spec_testsuite/call_indirect.wast",
-                "spec_testsuite/f32.wast",
                 "spec_testsuite/f32_bitwise.wast",
                 "spec_testsuite/f32_cmp.wast",
-                "spec_testsuite/f64.wast",
                 "spec_testsuite/f64_bitwise.wast",
                 "spec_testsuite/f64_cmp.wast",
                 "spec_testsuite/float_exprs.wast",
                 "spec_testsuite/float_misc.wast",
-                "spec_testsuite/imports.wast",
-                "spec_testsuite/local_get.wast",
-                "spec_testsuite/local_set.wast",
-                "spec_testsuite/local_tee.wast",
-                "spec_testsuite/loop.wast",
                 "spec_testsuite/proposals/annotations/simd_lane.wast",
-                "spec_testsuite/proposals/multi-memory/float_exprs0.wast",
-                "spec_testsuite/proposals/multi-memory/float_exprs1.wast",
-                "spec_testsuite/proposals/multi-memory/imports.wast",
                 "spec_testsuite/proposals/multi-memory/simd_memory-multi.wast",
                 "spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast",
                 "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast",
@@ -458,7 +446,6 @@ impl WastTest {
                 "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
                 "spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast",
                 "spec_testsuite/proposals/threads/atomic.wast",
-                "spec_testsuite/proposals/threads/imports.wast",
                 "spec_testsuite/simd_address.wast",
                 "spec_testsuite/simd_align.wast",
                 "spec_testsuite/simd_bit_shift.wast",