WebAssembly · kripken · Feb 21, 2024 · Feb 15, 2024 · Feb 15, 2024 · Feb 15, 2024
diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
@@ -35,7 +35,7 @@ struct DeNaN : public WalkerPass<
   // Adds calls.
   bool addsEffects() override { return true; }
 
-  Name deNan32, deNan64;
+  Name deNan32, deNan64, deNan128;
 
   void visitExpression(Expression* expr) {
     // If the expression returns a floating-point value, ensure it is not a
@@ -67,6 +67,13 @@ struct DeNaN : public WalkerPass<
       } else {
         replacement = builder.makeCall(deNan64, {expr}, Type::f64);
       }
+    } else if (expr->type == Type::v128) {
+      if (c && hasNaNLane(c)) {
+        uint8_t zero[16] = {};
+        replacement = builder.makeConst(Literal(zero));
+      } else {
+        replacement = builder.makeCall(deNan128, {expr}, Type::v128);
+      }
     }
     if (replacement) {
       // We can't do this outside of a function, like in a global initializer,
@@ -98,6 +105,11 @@ struct DeNaN : public WalkerPass<
           i,
           builder.makeCall(
             deNan64, {builder.makeLocalGet(i, Type::f64)}, Type::f64)));
+      } else if (func->getLocalType(i) == Type::v128) {
+        fixes.push_back(builder.makeLocalSet(
+          i,
+          builder.makeCall(
+            deNan128, {builder.makeLocalGet(i, Type::v128)}, Type::v128)));
       }
     }
     if (!fixes.empty()) {
@@ -115,34 +127,90 @@ struct DeNaN : public WalkerPass<
     // Pick names for the helper functions.
     deNan32 = Names::getValidFunctionName(*module, "deNan32");
     deNan64 = Names::getValidFunctionName(*module, "deNan64");
+    deNan128 = Names::getValidFunctionName(*module, "deNan128");
 
     ControlFlowWalker<DeNaN, UnifiedExpressionVisitor<DeNaN>>::doWalkModule(
       module);
 
     // Add helper functions after the walk, so they are not instrumented.
+    addFunc(module, deNan32, Type::f32, Literal(float(0)), EqFloat32);
+    addFunc(module, deNan64, Type::f64, Literal(double(0)), EqFloat64);
+
+    if (module->features.hasSIMD()) {
+      uint8_t zero128[16] = {};
+      addFunc(module, deNan128, Type::v128, Literal(zero128));
+    }
+  }
+
+  // Add a de-NaN-ing helper function.
+  void addFunc(Module* module,
+               Name name,
+               Type type,
+               Literal literal,
+               std::optional<BinaryOp> op = {}) {
     Builder builder(*module);
-    auto add = [&](Name name, Type type, Literal literal, BinaryOp op) {
-      auto func = Builder::makeFunction(name, Signature(type, type), {});
-      // Compare the value to itself to check if it is a NaN, and return 0 if
-      // so:
+    auto func = Builder::makeFunction(name, Signature(type, type), {});
+    // Compare the value to itself to check if it is a NaN, and return 0 if
+    // so:
+    //
+    //   (if (result f*)
+    //     (f*.eq
+    //       (local.get $0)
+    //       (local.get $0)
+    //     )
+    //     (local.get $0)
+    //     (f*.const 0)
+    //   )
+    Expression* condition;
+    if (type != Type::v128) {
+      // Generate a simple condition.
+      assert(op);
+      condition = builder.makeBinary(
+        *op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
+    } else {
+      assert(!op);
+      // v128 is trickier as the 128 bits may contain f32s or f64s, and we
+      // need to check for nans both ways in principle. However, the f32 NaN
+      // pattern is a superset of f64, since it checks less bits (8 bit
+      // exponent vs 11), and it is checked in more places (4 32-bit values vs
+      // 2 64-bit ones), so we can just check that. That is, this reduces to 4
+      // checks of f32s, but is otherwise the same as a check of a single f32.
       //
-      //   (if (result f*)
-      //     (f*.eq
-      //       (local.get $0)
-      //       (local.get $0)
-      //     )
-      //     (local.get $0)
-      //     (f*.const 0)
-      //   )
-      func->body = builder.makeIf(
-        builder.makeBinary(
-          op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)),
-        builder.makeLocalGet(0, type),
-        builder.makeConst(literal));
-      module->addFunction(std::move(func));
-    };
-    add(deNan32, Type::f32, Literal(float(0)), EqFloat32);
-    add(deNan64, Type::f64, Literal(double(0)), EqFloat64);
+      // However there is additional complexity, which is that if we do
+      // EqVecF32x4 then we get all-1s for each case where we compare equal.
+      // That itself is a NaN pattern, which means that running this pass
+      // twice would interfere with itself. To avoid that we'd need a way to
+      // detect our previous instrumentation and not instrument it, but that
+      // is tricky (we can't depend on function names etc. while fuzzing).
+      // Instead, extract the lanes and use f32 checks.
+      auto getLane = [&](Index index) {
+        return builder.makeSIMDExtract(
+          ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index);
+      };
+      auto getLaneCheck = [&](Index index) {
+        return builder.makeBinary(EqFloat32, getLane(index), getLane(index));
+      };
+      auto* firstTwo =
+        builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1));
+      auto* lastTwo =
+        builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3));
+      condition = builder.makeBinary(AndInt32, firstTwo, lastTwo);
+    }
+    func->body = builder.makeIf(
+      condition, builder.makeLocalGet(0, type), builder.makeConst(literal));
+    module->addFunction(std::move(func));
+  };
+
+  // Check if a contant v128 may contain f32 or f64 NaNs.
+  bool hasNaNLane(Const* c) {
+    assert(c->type == Type::v128);
+    auto value = c->value;
+
+    // Compute if all f32s are equal to themselves.
+    auto test32 = value.eqF32x4(value);
+    test32 = test32.allTrueI32x4();
+
+    return !test32.getInteger();
   }
 };
 

diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
@@ -0,0 +1,131 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: foreach %s %t wasm-opt --denan -all -S -o - | filecheck %s
+
+(module
+  ;; CHECK:      (type $0 (func (param v128) (result v128)))
+
+  ;; CHECK:      (type $1 (func (param f32) (result f32)))
+
+  ;; CHECK:      (type $2 (func (param f64) (result f64)))
+
+  ;; CHECK:      (func $foo128 (type $0) (param $x v128) (result v128)
+  ;; CHECK-NEXT:  (local.set $x
+  ;; CHECK-NEXT:   (call $deNan128
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (call $deNan128
+  ;; CHECK-NEXT:    (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (call $deNan128
+  ;; CHECK-NEXT:   (call $foo128
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $foo128 (param $x v128) (result v128)
+    ;; The incoming param will be de-naned.
+
+    ;; This is not a NaN. (We do still emit a call for it atm, FIXME)
+    (drop
+      (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004)
+    )
+    ;; This is an f64 NaN and also an f32. It will become 0's.
+    (drop
+      (v128.const i32x4 0xffffffff 0x00000002 0x00000003 0x00000004)
+    )
+    ;; This is an f32 NaN and not an f64. It will also become 0's.
+    (drop
+      (v128.const i32x4 0x00000001 0xffffffff 0x00000003 0x00000004)
+    )
+
+    (call $foo128 (local.get $x))
+  )
+)
+;; CHECK:      (func $deNan32 (type $1) (param $0 f32) (result f32)
+;; CHECK-NEXT:  (if (result f32)
+;; CHECK-NEXT:   (f32.eq
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (then
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (else
+;; CHECK-NEXT:    (f32.const 0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:  )
+;; CHECK-NEXT: )
+
+;; CHECK:      (func $deNan64 (type $2) (param $0 f64) (result f64)
+;; CHECK-NEXT:  (if (result f64)
+;; CHECK-NEXT:   (f64.eq
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (then
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (else
+;; CHECK-NEXT:    (f64.const 0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:  )
+;; CHECK-NEXT: )
+
+;; CHECK:      (func $deNan128 (type $0) (param $0 v128) (result v128)
+;; CHECK-NEXT:  (if (result v128)
+;; CHECK-NEXT:   (i32.and
+;; CHECK-NEXT:    (i32.and
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 0
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 0
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 1
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 1
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:    )
+;; CHECK-NEXT:    (i32.and
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 2
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 2
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 3
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 3
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:    )
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (then
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (else
+;; CHECK-NEXT:    (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:  )
+;; CHECK-NEXT: )