From aefdf037466f36d5fade8a00bcde9b226ac8c783 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 11:31:37 -0800
Subject: [PATCH 01/14] start

---
 src/passes/DeNaN.cpp | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)
diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 97f71c39fb9..070a102e444 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -35,7 +35,7 @@ struct DeNaN : public WalkerPass<
   // Adds calls.
   bool addsEffects() override { return true; }
 
-  Name deNan32, deNan64;
+  Name deNan32, deNan64, deNan128;
 
   void visitExpression(Expression* expr) {
     // If the expression returns a floating-point value, ensure it is not a
@@ -67,6 +67,9 @@ struct DeNaN : public WalkerPass<
       } else {
         replacement = builder.makeCall(deNan64, {expr}, Type::f64);
       }
+    } else if (expr->type == Type::v128) {
+      // Assume anything can be a nan TODO: optimize
+      replacement = builder.makeCall(deNan128, {expr}, Type::v128);
     }
     if (replacement) {
       // We can't do this outside of a function, like in a global initializer,
@@ -98,6 +101,11 @@ struct DeNaN : public WalkerPass<
           i,
           builder.makeCall(
             deNan64, {builder.makeLocalGet(i, Type::f64)}, Type::f64)));
+      } else if (func->getLocalType(i) == Type::v128) {
+        fixes.push_back(builder.makeLocalSet(
+          i,
+          builder.makeCall(
+            deNan128, {builder.makeLocalGet(i, Type::v128)}, Type::v128)));
       }
     }
     if (!fixes.empty()) {
@@ -115,6 +123,7 @@ struct DeNaN : public WalkerPass<
     // Pick names for the helper functions.
     deNan32 = Names::getValidFunctionName(*module, "deNan32");
     deNan64 = Names::getValidFunctionName(*module, "deNan64");
+    deNan128 = Names::getValidFunctionName(*module, "deNan128");
 
     ControlFlowWalker<DeNaN, UnifiedExpressionVisitor<DeNaN>>::doWalkModule(
       module);
@@ -143,6 +152,37 @@ struct DeNaN : public WalkerPass<
     };
     add(deNan32, Type::f32, Literal(float(0)), EqFloat32);
     add(deNan64, Type::f64, Literal(double(0)), EqFloat64);
+
+    // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to
+    // check for nans both ways.
+    {
+      auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {});
+
+      // Compare f32s to themselves, giving all 1's where equal and all 0's for
+      // a nan.
+      Expression* test32 =
+        builder.makeBinary(
+          EqVecF32x4, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128));
+      // Flip the bits, so that all 1's mean a nan.
+      test32 = builder.makeUnary(NotVec128, test32);
+      // Any 1 means we have a nan.
+      test32 = builder.makeUnary(AnyTrueVec128, test32);
+
+      // Ditto for f64.
+      Expression* test64 =
+        builder.makeBinary(
+          EqVecF64x2, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128));
+      test64 = builder.makeUnary(NotVec128, test64);
+      test64 = builder.makeUnary(AnyTrueVec128, test64);
+
+      // If either is a nan, we have a nan situation.
+      auto* testBoth = builder.makeBinary(OrInt32, test32, test64);
+
+      func->body = builder.makeIf(
+        testBoth,
+        builder.makeConst(Literal({0, 0, 0, 0})));
+      module->addFunction(std::move(func));
+    }
   }
 };
 

From 43bb800e5a0264c49b0735015cd2a379efcc181b Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 11:36:20 -0800
Subject: [PATCH 02/14] sad

---
 src/passes/DeNaN.cpp            |  2 +-
 test/lit/passes/denan-simd.wast | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 test/lit/passes/denan-simd.wast

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 070a102e444..4493e1a74bf 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -155,7 +155,7 @@ struct DeNaN : public WalkerPass<
 
     // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to
     // check for nans both ways.
-    {
+    if (module->features.hasSIMD()) {
       auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {});
 
       // Compare f32s to themselves, giving all 1's where equal and all 0's for
diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
new file mode 100644
index 00000000000..2424f0463b6
--- /dev/null
+++ b/test/lit/passes/denan-simd.wast
@@ -0,0 +1,20 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: foreach %s %t wasm-opt --denan -all -S -o - | filecheck %s
+
+(module
+  (func $foo128 (param $x v128) (result v128)
+    ;; The incoming param will be de-naned.
+
+    ;; The first constant is not a nan as either f32 or f64, but we will still
+    ;; de-nan them atm.
+    (drop
+      (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004)
+    )
+    (drop
+      (v128.const i32x4 0xffffffff 0x00000002 0x00000003 0x00000004)
+    )
+
+    (call $foo128 (local.get $x))
+  )
+)

From 2a75520f0c7c445fdec074d59a4ecc06fec25f2d Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 12:58:43 -0800
Subject: [PATCH 03/14] work

---
 src/passes/DeNaN.cpp            | 36 +++++++++++----
 test/lit/passes/denan-simd.wast | 78 +++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 9 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 4493e1a74bf..1185ce7f6e3 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -68,8 +68,12 @@ struct DeNaN : public WalkerPass<
         replacement = builder.makeCall(deNan64, {expr}, Type::f64);
       }
     } else if (expr->type == Type::v128) {
-      // Assume anything can be a nan TODO: optimize
-      replacement = builder.makeCall(deNan128, {expr}, Type::v128);
+      if (c && maybeNaN(c)) {
+        uint8_t zero[16] = {};
+        replacement = builder.makeConst(Literal(zero));
+      } else {
+        replacement = builder.makeCall(deNan128, {expr}, Type::v128);
+      }
     }
     if (replacement) {
       // We can't do this outside of a function, like in a global initializer,
@@ -163,27 +167,41 @@ struct DeNaN : public WalkerPass<
       Expression* test32 =
         builder.makeBinary(
           EqVecF32x4, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128));
-      // Flip the bits, so that all 1's mean a nan.
-      test32 = builder.makeUnary(NotVec128, test32);
-      // Any 1 means we have a nan.
-      test32 = builder.makeUnary(AnyTrueVec128, test32);
+      // Check if all were 1.
+      test32 = builder.makeUnary(AllTrueVecI32x4, test32);
 
       // Ditto for f64.
       Expression* test64 =
         builder.makeBinary(
           EqVecF64x2, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128));
-      test64 = builder.makeUnary(NotVec128, test64);
-      test64 = builder.makeUnary(AnyTrueVec128, test64);
+      test64 = builder.makeUnary(AllTrueVecI64x2, test64);
 
       // If either is a nan, we have a nan situation.
       auto* testBoth = builder.makeBinary(OrInt32, test32, test64);
 
+      uint8_t zero[16] = {};
       func->body = builder.makeIf(
         testBoth,
-        builder.makeConst(Literal({0, 0, 0, 0})));
+        builder.makeLocalGet(0, Type::v128),
+        builder.makeConst(Literal(zero)));
       module->addFunction(std::move(func));
     }
   }
+
+  // Check if a contant v128 may contain f32 or f64 NaNs. This does a sequence
+  // of operations much like deNan128() that was constructed above.
+  bool maybeNaN(Const* c) {
+    assert(c->type == Type::v128);
+    auto value = c->value;
+
+    auto test32 = value.eqF32x4(value);
+    test32 = test32.allTrueI32x4();
+
+    auto test64 = value.eqF64x2(value);
+    test64 = test64.allTrueI64x2();
+
+    return test32.getInteger() | test64.getInteger();
+  }
 };
 
 Pass* createDeNaNPass() { return new DeNaN(); }
diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
index 2424f0463b6..d0be0e666f6 100644
--- a/test/lit/passes/denan-simd.wast
+++ b/test/lit/passes/denan-simd.wast
@@ -3,6 +3,30 @@
 ;; RUN: foreach %s %t wasm-opt --denan -all -S -o - | filecheck %s
 
 (module
+  ;; CHECK:      (type $0 (func (param v128) (result v128)))
+
+  ;; CHECK:      (type $1 (func (param f32) (result f32)))
+
+  ;; CHECK:      (type $2 (func (param f64) (result f64)))
+
+  ;; CHECK:      (func $foo128 (type $0) (param $x v128) (result v128)
+  ;; CHECK-NEXT:  (local.set $x
+  ;; CHECK-NEXT:   (call $deNan128
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (call $deNan128
+  ;; CHECK-NEXT:   (call $foo128
+  ;; CHECK-NEXT:    (local.get $x)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
   (func $foo128 (param $x v128) (result v128)
     ;; The incoming param will be de-naned.
 
@@ -18,3 +42,57 @@
     (call $foo128 (local.get $x))
   )
 )
+;; CHECK:      (func $deNan32 (type $1) (param $0 f32) (result f32)
+;; CHECK-NEXT:  (if (result f32)
+;; CHECK-NEXT:   (f32.eq
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (then
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (else
+;; CHECK-NEXT:    (f32.const 0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:  )
+;; CHECK-NEXT: )
+
+;; CHECK:      (func $deNan64 (type $2) (param $0 f64) (result f64)
+;; CHECK-NEXT:  (if (result f64)
+;; CHECK-NEXT:   (f64.eq
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (then
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (else
+;; CHECK-NEXT:    (f64.const 0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:  )
+;; CHECK-NEXT: )
+
+;; CHECK:      (func $deNan128 (type $0) (param $0 v128) (result v128)
+;; CHECK-NEXT:  (if (result v128)
+;; CHECK-NEXT:   (i32.or
+;; CHECK-NEXT:    (i32x4.all_true
+;; CHECK-NEXT:     (f32x4.eq
+;; CHECK-NEXT:      (local.get $0)
+;; CHECK-NEXT:      (local.get $0)
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:    )
+;; CHECK-NEXT:    (i64x2.all_true
+;; CHECK-NEXT:     (f64x2.eq
+;; CHECK-NEXT:      (local.get $0)
+;; CHECK-NEXT:      (local.get $0)
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:    )
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (then
+;; CHECK-NEXT:    (local.get $0)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:   (else
+;; CHECK-NEXT:    (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+;; CHECK-NEXT:   )
+;; CHECK-NEXT:  )
+;; CHECK-NEXT: )

From 7c7ff8152abf8dca94aeb4726e336c8264c20d14 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 13:02:40 -0800
Subject: [PATCH 04/14] fix

---
 src/passes/DeNaN.cpp            | 5 ++++-
 test/lit/passes/denan-simd.wast | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 1185ce7f6e3..0ffdb9f5397 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -194,13 +194,16 @@ struct DeNaN : public WalkerPass<
     assert(c->type == Type::v128);
     auto value = c->value;
 
+    // Compute if all f32s are equal to themselves.
     auto test32 = value.eqF32x4(value);
     test32 = test32.allTrueI32x4();
 
+    // Compute if all f64s are equal to themselves.
     auto test64 = value.eqF64x2(value);
     test64 = test64.allTrueI64x2();
 
-    return test32.getInteger() | test64.getInteger();
+    // If any was not equal, this might be a NaN.
+    return !(test32.getInteger() && test64.getInteger());
   }
 };
 
diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
index d0be0e666f6..864ac5058f4 100644
--- a/test/lit/passes/denan-simd.wast
+++ b/test/lit/passes/denan-simd.wast
@@ -16,7 +16,9 @@
   ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (drop
-  ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+  ;; CHECK-NEXT:   (call $deNan128
+  ;; CHECK-NEXT:    (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004)
+  ;; CHECK-NEXT:   )
   ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)

From 44e6c863bc030247304b823db5e6c4bc6d46a6f8 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 13:10:48 -0800
Subject: [PATCH 05/14] fix

---
 src/passes/DeNaN.cpp            |  6 +++++-
 test/lit/passes/denan-simd.wast | 11 +++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 0ffdb9f5397..4dde80b6968 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -158,7 +158,11 @@ struct DeNaN : public WalkerPass<
     add(deNan64, Type::f64, Literal(double(0)), EqFloat64);
 
     // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to
-    // check for nans both ways.
+    // check for nans both ways. Note that the f32 NaN pattern is a subset of
+    // f64, since f64 NaNs fill with 1 the 11 bits of their exponent, which
+    // encompasses the 8 bits of the f32 exponent, but the position of those
+    // bits matters (a v128 has 4 possible places for f32 exponents, and only 2
+    // for f64), so we must test both.
     if (module->features.hasSIMD()) {
       auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {});
 
diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
index 864ac5058f4..003b60ee616 100644
--- a/test/lit/passes/denan-simd.wast
+++ b/test/lit/passes/denan-simd.wast
@@ -23,6 +23,9 @@
   ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
   ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)
+  ;; CHECK-NEXT:  )
   ;; CHECK-NEXT:  (call $deNan128
   ;; CHECK-NEXT:   (call $foo128
   ;; CHECK-NEXT:    (local.get $x)
@@ -32,14 +35,18 @@
   (func $foo128 (param $x v128) (result v128)
     ;; The incoming param will be de-naned.
 
-    ;; The first constant is not a nan as either f32 or f64, but we will still
-    ;; de-nan them atm.
+    ;; This is not a NaN. (We do still emit a call for it atm, FIXME)
     (drop
       (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004)
     )
+    ;; This is an f64 NaN and also an f32.
     (drop
       (v128.const i32x4 0xffffffff 0x00000002 0x00000003 0x00000004)
     )
+    ;; This is an f32 NaN and not an f64.
+    (drop
+      (v128.const i32x4 0x00000001 0xffffffff 0x00000003 0x00000004)
+    )
 
     (call $foo128 (local.get $x))
   )

From 96469aa00dc71e69c9bf84496df48d338da9e181 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 13:11:04 -0800
Subject: [PATCH 06/14] fix

---
 test/lit/passes/denan-simd.wast | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
index 003b60ee616..3a90c711645 100644
--- a/test/lit/passes/denan-simd.wast
+++ b/test/lit/passes/denan-simd.wast
@@ -39,11 +39,11 @@
     (drop
       (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004)
     )
-    ;; This is an f64 NaN and also an f32.
+    ;; This is an f64 NaN and also an f32. It will become 0's.
     (drop
       (v128.const i32x4 0xffffffff 0x00000002 0x00000003 0x00000004)
     )
-    ;; This is an f32 NaN and not an f64.
+    ;; This is an f32 NaN and not an f64. It will also become 0's.
     (drop
       (v128.const i32x4 0x00000001 0xffffffff 0x00000003 0x00000004)
     )

From 0cea4f4b1251f28d8c6eed714397090925f869da Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 13:19:28 -0800
Subject: [PATCH 07/14] work

---
 src/passes/DeNaN.cpp            | 57 ++++++++++-----------------------
 test/lit/passes/denan-simd.wast | 16 +++------
 2 files changed, 21 insertions(+), 52 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 4dde80b6968..0f9aa6afe33 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -147,48 +147,30 @@ struct DeNaN : public WalkerPass<
       //     (local.get $0)
       //     (f*.const 0)
       //   )
+      Expression* condition = builder.makeBinary(
+          op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
+      if (type == Type::v128) {
+        // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to
+        // check for nans both ways in principle. However, the f32 NaN pattern is a
+        // superset of f64, since it checks less bits (8 bit exponent vs 11), and it
+        // is checked in more places (4 32-bit values vs 2 64-bit ones), so we can
+        // just check that. We do so as follows: first, compare f32s to themselves,
+        // giving all 1's where not NaN. That has already been done by the
+        // Binary above that uses EqVecF32x4, so all we have left is to check
+        // them all.
+        condition = builder.makeUnary(AllTrueVecI32x4, condition);
+      }
       func->body = builder.makeIf(
-        builder.makeBinary(
-          op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)),
+        condition,
         builder.makeLocalGet(0, type),
         builder.makeConst(literal));
       module->addFunction(std::move(func));
     };
     add(deNan32, Type::f32, Literal(float(0)), EqFloat32);
     add(deNan64, Type::f64, Literal(double(0)), EqFloat64);
-
-    // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to
-    // check for nans both ways. Note that the f32 NaN pattern is a subset of
-    // f64, since f64 NaNs fill with 1 the 11 bits of their exponent, which
-    // encompasses the 8 bits of the f32 exponent, but the position of those
-    // bits matters (a v128 has 4 possible places for f32 exponents, and only 2
-    // for f64), so we must test both.
     if (module->features.hasSIMD()) {
-      auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {});
-
-      // Compare f32s to themselves, giving all 1's where equal and all 0's for
-      // a nan.
-      Expression* test32 =
-        builder.makeBinary(
-          EqVecF32x4, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128));
-      // Check if all were 1.
-      test32 = builder.makeUnary(AllTrueVecI32x4, test32);
-
-      // Ditto for f64.
-      Expression* test64 =
-        builder.makeBinary(
-          EqVecF64x2, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128));
-      test64 = builder.makeUnary(AllTrueVecI64x2, test64);
-
-      // If either is a nan, we have a nan situation.
-      auto* testBoth = builder.makeBinary(OrInt32, test32, test64);
-
-      uint8_t zero[16] = {};
-      func->body = builder.makeIf(
-        testBoth,
-        builder.makeLocalGet(0, Type::v128),
-        builder.makeConst(Literal(zero)));
-      module->addFunction(std::move(func));
+      uint8_t zero128[16] = {};
+      add(deNan128, Type::v128, Literal(zero128), EqVecF32x4);
     }
   }
 
@@ -202,12 +184,7 @@ struct DeNaN : public WalkerPass<
     auto test32 = value.eqF32x4(value);
     test32 = test32.allTrueI32x4();
 
-    // Compute if all f64s are equal to themselves.
-    auto test64 = value.eqF64x2(value);
-    test64 = test64.allTrueI64x2();
-
-    // If any was not equal, this might be a NaN.
-    return !(test32.getInteger() && test64.getInteger());
+    return !test32.getInteger();
   }
 };
 
diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
index 3a90c711645..5a800ad1a9b 100644
--- a/test/lit/passes/denan-simd.wast
+++ b/test/lit/passes/denan-simd.wast
@@ -83,18 +83,10 @@
 
 ;; CHECK:      (func $deNan128 (type $0) (param $0 v128) (result v128)
 ;; CHECK-NEXT:  (if (result v128)
-;; CHECK-NEXT:   (i32.or
-;; CHECK-NEXT:    (i32x4.all_true
-;; CHECK-NEXT:     (f32x4.eq
-;; CHECK-NEXT:      (local.get $0)
-;; CHECK-NEXT:      (local.get $0)
-;; CHECK-NEXT:     )
-;; CHECK-NEXT:    )
-;; CHECK-NEXT:    (i64x2.all_true
-;; CHECK-NEXT:     (f64x2.eq
-;; CHECK-NEXT:      (local.get $0)
-;; CHECK-NEXT:      (local.get $0)
-;; CHECK-NEXT:     )
+;; CHECK-NEXT:   (i32x4.all_true
+;; CHECK-NEXT:    (f32x4.eq
+;; CHECK-NEXT:     (local.get $0)
+;; CHECK-NEXT:     (local.get $0)
 ;; CHECK-NEXT:    )
 ;; CHECK-NEXT:   )
 ;; CHECK-NEXT:   (then

From d195c00a418965de80521aced07e8ea3dc7ca468 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 13:20:31 -0800
Subject: [PATCH 08/14] work

---
 src/passes/DeNaN.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 0f9aa6afe33..4b737a02af3 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -154,10 +154,10 @@ struct DeNaN : public WalkerPass<
         // check for nans both ways in principle. However, the f32 NaN pattern is a
         // superset of f64, since it checks less bits (8 bit exponent vs 11), and it
         // is checked in more places (4 32-bit values vs 2 64-bit ones), so we can
-        // just check that. We do so as follows: first, compare f32s to themselves,
-        // giving all 1's where not NaN. That has already been done by the
-        // Binary above that uses EqVecF32x4, so all we have left is to check
-        // them all.
+        // just check that. That is, this reduces to 4 checks of f32s, but is
+        // otherwise the same as a check of a single f32. The Binary above
+        // already does the 4 checks, so all we have left is to process those
+        // four results.
         condition = builder.makeUnary(AllTrueVecI32x4, condition);
       }
       func->body = builder.makeIf(

From 42e81f2d31dd72e7e62ed0214cb3a316d8938274 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 13:20:57 -0800
Subject: [PATCH 09/14] finish

---
 src/passes/DeNaN.cpp | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 4b737a02af3..510858d9de6 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -148,22 +148,20 @@ struct DeNaN : public WalkerPass<
       //     (f*.const 0)
       //   )
       Expression* condition = builder.makeBinary(
-          op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
+        op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
       if (type == Type::v128) {
-        // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to
-        // check for nans both ways in principle. However, the f32 NaN pattern is a
-        // superset of f64, since it checks less bits (8 bit exponent vs 11), and it
-        // is checked in more places (4 32-bit values vs 2 64-bit ones), so we can
-        // just check that. That is, this reduces to 4 checks of f32s, but is
-        // otherwise the same as a check of a single f32. The Binary above
-        // already does the 4 checks, so all we have left is to process those
-        // four results.
+        // v128 is trickier as the 128 bits may contain f32s or f64s, and we
+        // need to check for nans both ways in principle. However, the f32 NaN
+        // pattern is a superset of f64, since it checks less bits (8 bit
+        // exponent vs 11), and it is checked in more places (4 32-bit values vs
+        // 2 64-bit ones), so we can just check that. That is, this reduces to 4
+        // checks of f32s, but is otherwise the same as a check of a single f32.
+        // The Binary above already does the 4 checks, so all we have left is to
+        // process those four results.
         condition = builder.makeUnary(AllTrueVecI32x4, condition);
       }
       func->body = builder.makeIf(
-        condition,
-        builder.makeLocalGet(0, type),
-        builder.makeConst(literal));
+        condition, builder.makeLocalGet(0, type), builder.makeConst(literal));
       module->addFunction(std::move(func));
     };
     add(deNan32, Type::f32, Literal(float(0)), EqFloat32);

From d970d6ab0bad12f9fa99406c5cf4f1c851160227 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 14:02:19 -0800
Subject: [PATCH 10/14] work

---
 src/passes/DeNaN.cpp            | 36 +++++++++++++++++++++++------
 test/lit/passes/denan-simd.wast | 40 +++++++++++++++++++++++++++++----
 2 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 510858d9de6..0e9c2850c10 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -147,9 +147,12 @@ struct DeNaN : public WalkerPass<
       //     (local.get $0)
       //     (f*.const 0)
       //   )
-      Expression* condition = builder.makeBinary(
-        op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
-      if (type == Type::v128) {
+      Expression* condition;
+      if (type != Type::v128) {
+        // Generate a simple condition.
+        condition = builder.makeBinary(
+          op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
+      } else {
         // v128 is trickier as the 128 bits may contain f32s or f64s, and we
         // need to check for nans both ways in principle. However, the f32 NaN
         // pattern is a superset of f64, since it checks less bits (8 bit
@@ -158,22 +161,41 @@ struct DeNaN : public WalkerPass<
         // checks of f32s, but is otherwise the same as a check of a single f32.
         // The Binary above already does the 4 checks, so all we have left is to
         // process those four results.
-        condition = builder.makeUnary(AllTrueVecI32x4, condition);
+        //
+        // However there is additional complexity, which is that if we do
+        // EqVecF32x4 then we get all-1s for each case where we compare equal.
+        // That itself is a NaN pattern, which means that running this pass
+        // twice would interfere with itself. To avoid that we'd need a way to
+        // detect our previous instrumentation and not instrument it, but that
+        // is tricky (we can't depend on function names etc. while fuzzing).
+        // Instead, extract the lanes and use f32 checks.
+        auto getLane = [&](Index index) {
+          return builder.makeSIMDExtract(ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index);
+        };
+        auto getLaneCheck = [&](Index index) {
+          return builder.makeBinary(EqFloat32, getLane(index), getLane(index));
+        };
+        auto* firstTwo = builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1));
+        auto* lastTwo = builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3));
+        condition = builder.makeBinary(AndInt32, firstTwo, lastTwo);
       }
       func->body = builder.makeIf(
         condition, builder.makeLocalGet(0, type), builder.makeConst(literal));
       module->addFunction(std::move(func));
     };
+
     add(deNan32, Type::f32, Literal(float(0)), EqFloat32);
     add(deNan64, Type::f64, Literal(double(0)), EqFloat64);
+
     if (module->features.hasSIMD()) {
       uint8_t zero128[16] = {};
-      add(deNan128, Type::v128, Literal(zero128), EqVecF32x4);
+      // InvalidBinary is passed here as the condition in this case is more
+      // complex and computed above.
+      add(deNan128, Type::v128, Literal(zero128), InvalidBinary);
     }
   }
 
-  // Check if a contant v128 may contain f32 or f64 NaNs. This does a sequence
-  // of operations much like deNan128() that was constructed above.
+  // Check if a contant v128 may contain f32 or f64 NaNs.
   bool maybeNaN(Const* c) {
     assert(c->type == Type::v128);
     auto value = c->value;
diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast
index 5a800ad1a9b..3ab4e9120e6 100644
--- a/test/lit/passes/denan-simd.wast
+++ b/test/lit/passes/denan-simd.wast
@@ -83,10 +83,42 @@
 
 ;; CHECK:      (func $deNan128 (type $0) (param $0 v128) (result v128)
 ;; CHECK-NEXT:  (if (result v128)
-;; CHECK-NEXT:   (i32x4.all_true
-;; CHECK-NEXT:    (f32x4.eq
-;; CHECK-NEXT:     (local.get $0)
-;; CHECK-NEXT:     (local.get $0)
+;; CHECK-NEXT:   (i32.and
+;; CHECK-NEXT:    (i32.and
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 0
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 0
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 1
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 1
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:    )
+;; CHECK-NEXT:    (i32.and
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 2
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 2
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
+;; CHECK-NEXT:     (f32.eq
+;; CHECK-NEXT:      (f32x4.extract_lane 3
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:      (f32x4.extract_lane 3
+;; CHECK-NEXT:       (local.get $0)
+;; CHECK-NEXT:      )
+;; CHECK-NEXT:     )
 ;; CHECK-NEXT:    )
 ;; CHECK-NEXT:   )
 ;; CHECK-NEXT:   (then

From d4bfec9c5e39f3795fe72f4d0db5d07776f82e46 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 14:02:32 -0800
Subject: [PATCH 11/14] format

---
 src/passes/DeNaN.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 0e9c2850c10..f3739a6759f 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -170,13 +170,16 @@ struct DeNaN : public WalkerPass<
         // is tricky (we can't depend on function names etc. while fuzzing).
         // Instead, extract the lanes and use f32 checks.
         auto getLane = [&](Index index) {
-          return builder.makeSIMDExtract(ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index);
+          return builder.makeSIMDExtract(
+            ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index);
         };
         auto getLaneCheck = [&](Index index) {
           return builder.makeBinary(EqFloat32, getLane(index), getLane(index));
         };
-        auto* firstTwo = builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1));
-        auto* lastTwo = builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3));
+        auto* firstTwo =
+          builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1));
+        auto* lastTwo =
+          builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3));
         condition = builder.makeBinary(AndInt32, firstTwo, lastTwo);
       }
       func->body = builder.makeIf(

From 66c2c50996d1b334453e0cb6ae70d4c22b028052 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Thu, 15 Feb 2024 14:22:17 -0800
Subject: [PATCH 12/14] comment

---
 src/passes/DeNaN.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index f3739a6759f..0f825452a64 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -159,8 +159,6 @@ struct DeNaN : public WalkerPass<
         // exponent vs 11), and it is checked in more places (4 32-bit values vs
         // 2 64-bit ones), so we can just check that. That is, this reduces to 4
         // checks of f32s, but is otherwise the same as a check of a single f32.
-        // The Binary above already does the 4 checks, so all we have left is to
-        // process those four results.
         //
         // However there is additional complexity, which is that if we do
         // EqVecF32x4 then we get all-1s for each case where we compare equal.

From c08fa5d3b95b09c2d9f25032f9fcf929cdc45e23 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Tue, 20 Feb 2024 14:14:53 -0800
Subject: [PATCH 13/14] feedback

---
 src/passes/DeNaN.cpp | 123 ++++++++++++++++++++++---------------------
 1 file changed, 64 insertions(+), 59 deletions(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 0f825452a64..4e9f2559661 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -68,7 +68,7 @@ struct DeNaN : public WalkerPass<
         replacement = builder.makeCall(deNan64, {expr}, Type::f64);
       }
     } else if (expr->type == Type::v128) {
-      if (c && maybeNaN(c)) {
+      if (c && hasNaNLane(c)) {
         uint8_t zero[16] = {};
         replacement = builder.makeConst(Literal(zero));
       } else {
@@ -133,71 +133,76 @@ struct DeNaN : public WalkerPass<
       module);
 
     // Add helper functions after the walk, so they are not instrumented.
-    Builder builder(*module);
-    auto add = [&](Name name, Type type, Literal literal, BinaryOp op) {
-      auto func = Builder::makeFunction(name, Signature(type, type), {});
-      // Compare the value to itself to check if it is a NaN, and return 0 if
-      // so:
-      //
-      //   (if (result f*)
-      //     (f*.eq
-      //       (local.get $0)
-      //       (local.get $0)
-      //     )
-      //     (local.get $0)
-      //     (f*.const 0)
-      //   )
-      Expression* condition;
-      if (type != Type::v128) {
-        // Generate a simple condition.
-        condition = builder.makeBinary(
-          op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
-      } else {
-        // v128 is trickier as the 128 bits may contain f32s or f64s, and we
-        // need to check for nans both ways in principle. However, the f32 NaN
-        // pattern is a superset of f64, since it checks less bits (8 bit
-        // exponent vs 11), and it is checked in more places (4 32-bit values vs
-        // 2 64-bit ones), so we can just check that. That is, this reduces to 4
-        // checks of f32s, but is otherwise the same as a check of a single f32.
-        //
-        // However there is additional complexity, which is that if we do
-        // EqVecF32x4 then we get all-1s for each case where we compare equal.
-        // That itself is a NaN pattern, which means that running this pass
-        // twice would interfere with itself. To avoid that we'd need a way to
-        // detect our previous instrumentation and not instrument it, but that
-        // is tricky (we can't depend on function names etc. while fuzzing).
-        // Instead, extract the lanes and use f32 checks.
-        auto getLane = [&](Index index) {
-          return builder.makeSIMDExtract(
-            ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index);
-        };
-        auto getLaneCheck = [&](Index index) {
-          return builder.makeBinary(EqFloat32, getLane(index), getLane(index));
-        };
-        auto* firstTwo =
-          builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1));
-        auto* lastTwo =
-          builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3));
-        condition = builder.makeBinary(AndInt32, firstTwo, lastTwo);
-      }
-      func->body = builder.makeIf(
-        condition, builder.makeLocalGet(0, type), builder.makeConst(literal));
-      module->addFunction(std::move(func));
-    };
-
-    add(deNan32, Type::f32, Literal(float(0)), EqFloat32);
-    add(deNan64, Type::f64, Literal(double(0)), EqFloat64);
+    addFunc(module, deNan32, Type::f32, Literal(float(0)), EqFloat32);
+    addFunc(module, deNan64, Type::f64, Literal(double(0)), EqFloat64);
 
     if (module->features.hasSIMD()) {
       uint8_t zero128[16] = {};
-      // InvalidBinary is passed here as the condition in this case is more
-      // complex and computed above.
-      add(deNan128, Type::v128, Literal(zero128), InvalidBinary);
+      addFunc(module, deNan128, Type::v128, Literal(zero128));
     }
   }
 
+  // Add a de-NaN-ing helper function.
+  void addFunc(Module* module,
+               Name name,
+               Type type,
+               Literal literal,
+               std::optional<BinaryOp> op={}) {
+    Builder builder(*module);
+    auto func = Builder::makeFunction(name, Signature(type, type), {});
+    // Compare the value to itself to check if it is a NaN, and return 0 if
+    // so:
+    //
+    //   (if (result f*)
+    //     (f*.eq
+    //       (local.get $0)
+    //       (local.get $0)
+    //     )
+    //     (local.get $0)
+    //     (f*.const 0)
+    //   )
+    Expression* condition;
+    if (type != Type::v128) {
+      // Generate a simple condition.
+      assert(op);
+      condition = builder.makeBinary(
+        *op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type));
+    } else {
+      assert(!op);
+      // v128 is trickier as the 128 bits may contain f32s or f64s, and we
+      // need to check for nans both ways in principle. However, the f32 NaN
+      // pattern is a superset of f64, since it checks less bits (8 bit
+      // exponent vs 11), and it is checked in more places (4 32-bit values vs
+      // 2 64-bit ones), so we can just check that. That is, this reduces to 4
+      // checks of f32s, but is otherwise the same as a check of a single f32.
+      //
+      // However there is additional complexity, which is that if we do
+      // EqVecF32x4 then we get all-1s for each case where we compare equal.
+      // That itself is a NaN pattern, which means that running this pass
+      // twice would interfere with itself. To avoid that we'd need a way to
+      // detect our previous instrumentation and not instrument it, but that
+      // is tricky (we can't depend on function names etc. while fuzzing).
+      // Instead, extract the lanes and use f32 checks.
+      auto getLane = [&](Index index) {
+        return builder.makeSIMDExtract(
+          ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index);
+      };
+      auto getLaneCheck = [&](Index index) {
+        return builder.makeBinary(EqFloat32, getLane(index), getLane(index));
+      };
+      auto* firstTwo =
+        builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1));
+      auto* lastTwo =
+        builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3));
+      condition = builder.makeBinary(AndInt32, firstTwo, lastTwo);
+    }
+    func->body = builder.makeIf(
+      condition, builder.makeLocalGet(0, type), builder.makeConst(literal));
+    module->addFunction(std::move(func));
+  };
+
   // Check if a contant v128 may contain f32 or f64 NaNs.
-  bool maybeNaN(Const* c) {
+  bool hasNaNLane(Const* c) {
     assert(c->type == Type::v128);
     auto value = c->value;
 

From cf07915fb4bc80f78d3ecfbb402a8dc2ceebcad3 Mon Sep 17 00:00:00 2001
From: Alon Zakai <azakai@google.com>
Date: Tue, 20 Feb 2024 15:42:39 -0800
Subject: [PATCH 14/14] format

---
 src/passes/DeNaN.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp
index 4e9f2559661..b0bd5fb60de 100644
--- a/src/passes/DeNaN.cpp
+++ b/src/passes/DeNaN.cpp
@@ -147,7 +147,7 @@ struct DeNaN : public WalkerPass<
                Name name,
                Type type,
                Literal literal,
-               std::optional<BinaryOp> op={}) {
+               std::optional<BinaryOp> op = {}) {
     Builder builder(*module);
     auto func = Builder::makeFunction(name, Signature(type, type), {});
     // Compare the value to itself to check if it is a NaN, and return 0 if