From aefdf037466f36d5fade8a00bcde9b226ac8c783 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 11:31:37 -0800 Subject: [PATCH 01/14] start --- src/passes/DeNaN.cpp | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 97f71c39fb9..070a102e444 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -35,7 +35,7 @@ struct DeNaN : public WalkerPass< // Adds calls. bool addsEffects() override { return true; } - Name deNan32, deNan64; + Name deNan32, deNan64, deNan128; void visitExpression(Expression* expr) { // If the expression returns a floating-point value, ensure it is not a @@ -67,6 +67,9 @@ struct DeNaN : public WalkerPass< } else { replacement = builder.makeCall(deNan64, {expr}, Type::f64); } + } else if (expr->type == Type::v128) { + // Assume anything can be a nan TODO: optimize + replacement = builder.makeCall(deNan128, {expr}, Type::v128); } if (replacement) { // We can't do this outside of a function, like in a global initializer, @@ -98,6 +101,11 @@ struct DeNaN : public WalkerPass< i, builder.makeCall( deNan64, {builder.makeLocalGet(i, Type::f64)}, Type::f64))); + } else if (func->getLocalType(i) == Type::v128) { + fixes.push_back(builder.makeLocalSet( + i, + builder.makeCall( + deNan128, {builder.makeLocalGet(i, Type::v128)}, Type::v128))); } } if (!fixes.empty()) { @@ -115,6 +123,7 @@ struct DeNaN : public WalkerPass< // Pick names for the helper functions. deNan32 = Names::getValidFunctionName(*module, "deNan32"); deNan64 = Names::getValidFunctionName(*module, "deNan64"); + deNan128 = Names::getValidFunctionName(*module, "deNan128"); ControlFlowWalker>::doWalkModule( module); @@ -143,6 +152,37 @@ struct DeNaN : public WalkerPass< }; add(deNan32, Type::f32, Literal(float(0)), EqFloat32); add(deNan64, Type::f64, Literal(double(0)), EqFloat64); + + // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to + // check for nans both ways. + { + auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {}); + + // Compare f32s to themselves, giving all 1's where equal and all 0's for + // a nan. + Expression* test32 = + builder.makeBinary( + EqVecF32x4, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128)); + // Flip the bits, so that all 1's mean a nan. + test32 = builder.makeUnary(NotVec128, test32); + // Any 1 means we have a nan. + test32 = builder.makeUnary(AnyTrueVec128, test32); + + // Ditto for f64. + Expression* test64 = + builder.makeBinary( + EqVecF64x2, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128)); + test64 = builder.makeUnary(NotVec128, test64); + test64 = builder.makeUnary(AnyTrueVec128, test64); + + // If either is a nan, we have a nan situation. + auto* testBoth = builder.makeBinary(OrInt32, test32, test64); + + func->body = builder.makeIf( + testBoth, + builder.makeConst(Literal({0, 0, 0, 0}))); + module->addFunction(std::move(func)); + } } }; From 43bb800e5a0264c49b0735015cd2a379efcc181b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 11:36:20 -0800 Subject: [PATCH 02/14] sad --- src/passes/DeNaN.cpp | 2 +- test/lit/passes/denan-simd.wast | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 test/lit/passes/denan-simd.wast diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 070a102e444..4493e1a74bf 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -155,7 +155,7 @@ struct DeNaN : public WalkerPass< // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to // check for nans both ways. - { + if (module->features.hasSIMD()) { auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {}); // Compare f32s to themselves, giving all 1's where equal and all 0's for diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast new file mode 100644 index 00000000000..2424f0463b6 --- /dev/null +++ b/test/lit/passes/denan-simd.wast @@ -0,0 +1,20 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt --denan -all -S -o - | filecheck %s + +(module + (func $foo128 (param $x v128) (result v128) + ;; The incoming param will be de-naned. + + ;; The first constant is not a nan as either f32 or f64, but we will still + ;; de-nan them atm. + (drop + (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004) + ) + (drop + (v128.const i32x4 0xffffffff 0x00000002 0x00000003 0x00000004) + ) + + (call $foo128 (local.get $x)) + ) +) From 2a75520f0c7c445fdec074d59a4ecc06fec25f2d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 12:58:43 -0800 Subject: [PATCH 03/14] work --- src/passes/DeNaN.cpp | 36 +++++++++++---- test/lit/passes/denan-simd.wast | 78 +++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 9 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 4493e1a74bf..1185ce7f6e3 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -68,8 +68,12 @@ struct DeNaN : public WalkerPass< replacement = builder.makeCall(deNan64, {expr}, Type::f64); } } else if (expr->type == Type::v128) { - // Assume anything can be a nan TODO: optimize - replacement = builder.makeCall(deNan128, {expr}, Type::v128); + if (c && maybeNaN(c)) { + uint8_t zero[16] = {}; + replacement = builder.makeConst(Literal(zero)); + } else { + replacement = builder.makeCall(deNan128, {expr}, Type::v128); + } } if (replacement) { // We can't do this outside of a function, like in a global initializer, @@ -163,27 +167,41 @@ struct DeNaN : public WalkerPass< Expression* test32 = builder.makeBinary( EqVecF32x4, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128)); - // Flip the bits, so that all 1's mean a nan. - test32 = builder.makeUnary(NotVec128, test32); - // Any 1 means we have a nan. - test32 = builder.makeUnary(AnyTrueVec128, test32); + // Check if all were 1. + test32 = builder.makeUnary(AllTrueVecI32x4, test32); // Ditto for f64. Expression* test64 = builder.makeBinary( EqVecF64x2, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128)); - test64 = builder.makeUnary(NotVec128, test64); - test64 = builder.makeUnary(AnyTrueVec128, test64); + test64 = builder.makeUnary(AllTrueVecI64x2, test64); // If either is a nan, we have a nan situation. auto* testBoth = builder.makeBinary(OrInt32, test32, test64); + uint8_t zero[16] = {}; func->body = builder.makeIf( testBoth, - builder.makeConst(Literal({0, 0, 0, 0}))); + builder.makeLocalGet(0, Type::v128), + builder.makeConst(Literal(zero))); module->addFunction(std::move(func)); } } + + // Check if a contant v128 may contain f32 or f64 NaNs. This does a sequence + // of operations much like deNan128() that was constructed above. + bool maybeNaN(Const* c) { + assert(c->type == Type::v128); + auto value = c->value; + + auto test32 = value.eqF32x4(value); + test32 = test32.allTrueI32x4(); + + auto test64 = value.eqF64x2(value); + test64 = test64.allTrueI64x2(); + + return test32.getInteger() | test64.getInteger(); + } }; Pass* createDeNaNPass() { return new DeNaN(); } diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast index 2424f0463b6..d0be0e666f6 100644 --- a/test/lit/passes/denan-simd.wast +++ b/test/lit/passes/denan-simd.wast @@ -3,6 +3,30 @@ ;; RUN: foreach %s %t wasm-opt --denan -all -S -o - | filecheck %s (module + ;; CHECK: (type $0 (func (param v128) (result v128))) + + ;; CHECK: (type $1 (func (param f32) (result f32))) + + ;; CHECK: (type $2 (func (param f64) (result f64))) + + ;; CHECK: (func $foo128 (type $0) (param $x v128) (result v128) + ;; CHECK-NEXT: (local.set $x + ;; CHECK-NEXT: (call $deNan128 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (call $deNan128 + ;; CHECK-NEXT: (call $foo128 + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $foo128 (param $x v128) (result v128) ;; The incoming param will be de-naned. @@ -18,3 +42,57 @@ (call $foo128 (local.get $x)) ) ) +;; CHECK: (func $deNan32 (type $1) (param $0 f32) (result f32) +;; CHECK-NEXT: (if (result f32) +;; CHECK-NEXT: (f32.eq +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (then +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (else +;; CHECK-NEXT: (f32.const 0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) + +;; CHECK: (func $deNan64 (type $2) (param $0 f64) (result f64) +;; CHECK-NEXT: (if (result f64) +;; CHECK-NEXT: (f64.eq +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (then +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (else +;; CHECK-NEXT: (f64.const 0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) + +;; CHECK: (func $deNan128 (type $0) (param $0 v128) (result v128) +;; CHECK-NEXT: (if (result v128) +;; CHECK-NEXT: (i32.or +;; CHECK-NEXT: (i32x4.all_true +;; CHECK-NEXT: (f32x4.eq +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (i64x2.all_true +;; CHECK-NEXT: (f64x2.eq +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (then +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (else +;; CHECK-NEXT: (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) From 7c7ff8152abf8dca94aeb4726e336c8264c20d14 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 13:02:40 -0800 Subject: [PATCH 04/14] fix --- src/passes/DeNaN.cpp | 5 ++++- test/lit/passes/denan-simd.wast | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 1185ce7f6e3..0ffdb9f5397 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -194,13 +194,16 @@ struct DeNaN : public WalkerPass< assert(c->type == Type::v128); auto value = c->value; + // Compute if all f32s are equal to themselves. auto test32 = value.eqF32x4(value); test32 = test32.allTrueI32x4(); + // Compute if all f64s are equal to themselves. auto test64 = value.eqF64x2(value); test64 = test64.allTrueI64x2(); - return test32.getInteger() | test64.getInteger(); + // If any was not equal, this might be a NaN. + return !(test32.getInteger() && test64.getInteger()); } }; diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast index d0be0e666f6..864ac5058f4 100644 --- a/test/lit/passes/denan-simd.wast +++ b/test/lit/passes/denan-simd.wast @@ -16,7 +16,9 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ;; CHECK-NEXT: (call $deNan128 + ;; CHECK-NEXT: (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) From 44e6c863bc030247304b823db5e6c4bc6d46a6f8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 13:10:48 -0800 Subject: [PATCH 05/14] fix --- src/passes/DeNaN.cpp | 6 +++++- test/lit/passes/denan-simd.wast | 11 +++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 0ffdb9f5397..4dde80b6968 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -158,7 +158,11 @@ struct DeNaN : public WalkerPass< add(deNan64, Type::f64, Literal(double(0)), EqFloat64); // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to - // check for nans both ways. + // check for nans both ways. Note that the f32 NaN pattern is a subset of + // f64, since f64 NaNs fill with 1 the 11 bits of their exponent, which + // encompasses the 8 bits of the f32 exponent, but the position of those + // bits matters (a v128 has 4 possible places for f32 exponents, and only 2 + // for f64), so we must test both. if (module->features.hasSIMD()) { auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {}); diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast index 864ac5058f4..003b60ee616 100644 --- a/test/lit/passes/denan-simd.wast +++ b/test/lit/passes/denan-simd.wast @@ -23,6 +23,9 @@ ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: (call $deNan128 ;; CHECK-NEXT: (call $foo128 ;; CHECK-NEXT: (local.get $x) @@ -32,14 +35,18 @@ (func $foo128 (param $x v128) (result v128) ;; The incoming param will be de-naned. - ;; The first constant is not a nan as either f32 or f64, but we will still - ;; de-nan them atm. + ;; This is not a NaN. (We do still emit a call for it atm, FIXME) (drop (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004) ) + ;; This is an f64 NaN and also an f32. (drop (v128.const i32x4 0xffffffff 0x00000002 0x00000003 0x00000004) ) + ;; This is an f32 NaN and not an f64. + (drop + (v128.const i32x4 0x00000001 0xffffffff 0x00000003 0x00000004) + ) (call $foo128 (local.get $x)) ) From 96469aa00dc71e69c9bf84496df48d338da9e181 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 13:11:04 -0800 Subject: [PATCH 06/14] fix --- test/lit/passes/denan-simd.wast | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast index 003b60ee616..3a90c711645 100644 --- a/test/lit/passes/denan-simd.wast +++ b/test/lit/passes/denan-simd.wast @@ -39,11 +39,11 @@ (drop (v128.const i32x4 0x00000001 0x00000002 0x00000003 0x00000004) ) - ;; This is an f64 NaN and also an f32. + ;; This is an f64 NaN and also an f32. It will become 0's. (drop (v128.const i32x4 0xffffffff 0x00000002 0x00000003 0x00000004) ) - ;; This is an f32 NaN and not an f64. + ;; This is an f32 NaN and not an f64. It will also become 0's. (drop (v128.const i32x4 0x00000001 0xffffffff 0x00000003 0x00000004) ) From 0cea4f4b1251f28d8c6eed714397090925f869da Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 13:19:28 -0800 Subject: [PATCH 07/14] work --- src/passes/DeNaN.cpp | 57 ++++++++++----------------------- test/lit/passes/denan-simd.wast | 16 +++------ 2 files changed, 21 insertions(+), 52 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 4dde80b6968..0f9aa6afe33 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -147,48 +147,30 @@ struct DeNaN : public WalkerPass< // (local.get $0) // (f*.const 0) // ) + Expression* condition = builder.makeBinary( + op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)); + if (type == Type::v128) { + // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to + // check for nans both ways in principle. However, the f32 NaN pattern is a + // superset of f64, since it checks less bits (8 bit exponent vs 11), and it + // is checked in more places (4 32-bit values vs 2 64-bit ones), so we can + // just check that. We do so as follows: first, compare f32s to themselves, + // giving all 1's where not NaN. That has already been done by the + // Binary above that uses EqVecF32x4, so all we have left is to check + // them all. + condition = builder.makeUnary(AllTrueVecI32x4, condition); + } func->body = builder.makeIf( - builder.makeBinary( - op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)), + condition, builder.makeLocalGet(0, type), builder.makeConst(literal)); module->addFunction(std::move(func)); }; add(deNan32, Type::f32, Literal(float(0)), EqFloat32); add(deNan64, Type::f64, Literal(double(0)), EqFloat64); - - // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to - // check for nans both ways. Note that the f32 NaN pattern is a subset of - // f64, since f64 NaNs fill with 1 the 11 bits of their exponent, which - // encompasses the 8 bits of the f32 exponent, but the position of those - // bits matters (a v128 has 4 possible places for f32 exponents, and only 2 - // for f64), so we must test both. if (module->features.hasSIMD()) { - auto func = Builder::makeFunction(deNan128, Signature(Type::v128, Type::v128), {}); - - // Compare f32s to themselves, giving all 1's where equal and all 0's for - // a nan. - Expression* test32 = - builder.makeBinary( - EqVecF32x4, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128)); - // Check if all were 1. - test32 = builder.makeUnary(AllTrueVecI32x4, test32); - - // Ditto for f64. - Expression* test64 = - builder.makeBinary( - EqVecF64x2, builder.makeLocalGet(0, Type::v128), builder.makeLocalGet(0, Type::v128)); - test64 = builder.makeUnary(AllTrueVecI64x2, test64); - - // If either is a nan, we have a nan situation. - auto* testBoth = builder.makeBinary(OrInt32, test32, test64); - - uint8_t zero[16] = {}; - func->body = builder.makeIf( - testBoth, - builder.makeLocalGet(0, Type::v128), - builder.makeConst(Literal(zero))); - module->addFunction(std::move(func)); + uint8_t zero128[16] = {}; + add(deNan128, Type::v128, Literal(zero128), EqVecF32x4); } } @@ -202,12 +184,7 @@ struct DeNaN : public WalkerPass< auto test32 = value.eqF32x4(value); test32 = test32.allTrueI32x4(); - // Compute if all f64s are equal to themselves. - auto test64 = value.eqF64x2(value); - test64 = test64.allTrueI64x2(); - - // If any was not equal, this might be a NaN. - return !(test32.getInteger() && test64.getInteger()); + return !test32.getInteger(); } }; diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast index 3a90c711645..5a800ad1a9b 100644 --- a/test/lit/passes/denan-simd.wast +++ b/test/lit/passes/denan-simd.wast @@ -83,18 +83,10 @@ ;; CHECK: (func $deNan128 (type $0) (param $0 v128) (result v128) ;; CHECK-NEXT: (if (result v128) -;; CHECK-NEXT: (i32.or -;; CHECK-NEXT: (i32x4.all_true -;; CHECK-NEXT: (f32x4.eq -;; CHECK-NEXT: (local.get $0) -;; CHECK-NEXT: (local.get $0) -;; CHECK-NEXT: ) -;; CHECK-NEXT: ) -;; CHECK-NEXT: (i64x2.all_true -;; CHECK-NEXT: (f64x2.eq -;; CHECK-NEXT: (local.get $0) -;; CHECK-NEXT: (local.get $0) -;; CHECK-NEXT: ) +;; CHECK-NEXT: (i32x4.all_true +;; CHECK-NEXT: (f32x4.eq +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then From d195c00a418965de80521aced07e8ea3dc7ca468 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 13:20:31 -0800 Subject: [PATCH 08/14] work --- src/passes/DeNaN.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 0f9aa6afe33..4b737a02af3 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -154,10 +154,10 @@ struct DeNaN : public WalkerPass< // check for nans both ways in principle. However, the f32 NaN pattern is a // superset of f64, since it checks less bits (8 bit exponent vs 11), and it // is checked in more places (4 32-bit values vs 2 64-bit ones), so we can - // just check that. We do so as follows: first, compare f32s to themselves, - // giving all 1's where not NaN. That has already been done by the - // Binary above that uses EqVecF32x4, so all we have left is to check - // them all. + // just check that. That is, this reduces to 4 checks of f32s, but is + // otherwise the same as a check of a single f32. The Binary above + // already does the 4 checks, so all we have left is to process those + // four results. condition = builder.makeUnary(AllTrueVecI32x4, condition); } func->body = builder.makeIf( From 42e81f2d31dd72e7e62ed0214cb3a316d8938274 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 13:20:57 -0800 Subject: [PATCH 09/14] finish --- src/passes/DeNaN.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 4b737a02af3..510858d9de6 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -148,22 +148,20 @@ struct DeNaN : public WalkerPass< // (f*.const 0) // ) Expression* condition = builder.makeBinary( - op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)); + op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)); if (type == Type::v128) { - // v128 is trickier as the 128 bits may contain f32s or f64s, and we need to - // check for nans both ways in principle. However, the f32 NaN pattern is a - // superset of f64, since it checks less bits (8 bit exponent vs 11), and it - // is checked in more places (4 32-bit values vs 2 64-bit ones), so we can - // just check that. That is, this reduces to 4 checks of f32s, but is - // otherwise the same as a check of a single f32. The Binary above - // already does the 4 checks, so all we have left is to process those - // four results. + // v128 is trickier as the 128 bits may contain f32s or f64s, and we + // need to check for nans both ways in principle. However, the f32 NaN + // pattern is a superset of f64, since it checks less bits (8 bit + // exponent vs 11), and it is checked in more places (4 32-bit values vs + // 2 64-bit ones), so we can just check that. That is, this reduces to 4 + // checks of f32s, but is otherwise the same as a check of a single f32. + // The Binary above already does the 4 checks, so all we have left is to + // process those four results. condition = builder.makeUnary(AllTrueVecI32x4, condition); } func->body = builder.makeIf( - condition, - builder.makeLocalGet(0, type), - builder.makeConst(literal)); + condition, builder.makeLocalGet(0, type), builder.makeConst(literal)); module->addFunction(std::move(func)); }; add(deNan32, Type::f32, Literal(float(0)), EqFloat32); From d970d6ab0bad12f9fa99406c5cf4f1c851160227 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 14:02:19 -0800 Subject: [PATCH 10/14] work --- src/passes/DeNaN.cpp | 36 +++++++++++++++++++++++------ test/lit/passes/denan-simd.wast | 40 +++++++++++++++++++++++++++++---- 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 510858d9de6..0e9c2850c10 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -147,9 +147,12 @@ struct DeNaN : public WalkerPass< // (local.get $0) // (f*.const 0) // ) - Expression* condition = builder.makeBinary( - op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)); - if (type == Type::v128) { + Expression* condition; + if (type != Type::v128) { + // Generate a simple condition. + condition = builder.makeBinary( + op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)); + } else { // v128 is trickier as the 128 bits may contain f32s or f64s, and we // need to check for nans both ways in principle. However, the f32 NaN // pattern is a superset of f64, since it checks less bits (8 bit @@ -158,22 +161,41 @@ struct DeNaN : public WalkerPass< // checks of f32s, but is otherwise the same as a check of a single f32. // The Binary above already does the 4 checks, so all we have left is to // process those four results. - condition = builder.makeUnary(AllTrueVecI32x4, condition); + // + // However there is additional complexity, which is that if we do + // EqVecF32x4 then we get all-1s for each case where we compare equal. + // That itself is a NaN pattern, which means that running this pass + // twice would interfere with itself. To avoid that we'd need a way to + // detect our previous instrumentation and not instrument it, but that + // is tricky (we can't depend on function names etc. while fuzzing). + // Instead, extract the lanes and use f32 checks. + auto getLane = [&](Index index) { + return builder.makeSIMDExtract(ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index); + }; + auto getLaneCheck = [&](Index index) { + return builder.makeBinary(EqFloat32, getLane(index), getLane(index)); + }; + auto* firstTwo = builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1)); + auto* lastTwo = builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3)); + condition = builder.makeBinary(AndInt32, firstTwo, lastTwo); } func->body = builder.makeIf( condition, builder.makeLocalGet(0, type), builder.makeConst(literal)); module->addFunction(std::move(func)); }; + add(deNan32, Type::f32, Literal(float(0)), EqFloat32); add(deNan64, Type::f64, Literal(double(0)), EqFloat64); + if (module->features.hasSIMD()) { uint8_t zero128[16] = {}; - add(deNan128, Type::v128, Literal(zero128), EqVecF32x4); + // InvalidBinary is passed here as the condition in this case is more + // complex and computed above. + add(deNan128, Type::v128, Literal(zero128), InvalidBinary); } } - // Check if a contant v128 may contain f32 or f64 NaNs. This does a sequence - // of operations much like deNan128() that was constructed above. + // Check if a contant v128 may contain f32 or f64 NaNs. bool maybeNaN(Const* c) { assert(c->type == Type::v128); auto value = c->value; diff --git a/test/lit/passes/denan-simd.wast b/test/lit/passes/denan-simd.wast index 5a800ad1a9b..3ab4e9120e6 100644 --- a/test/lit/passes/denan-simd.wast +++ b/test/lit/passes/denan-simd.wast @@ -83,10 +83,42 @@ ;; CHECK: (func $deNan128 (type $0) (param $0 v128) (result v128) ;; CHECK-NEXT: (if (result v128) -;; CHECK-NEXT: (i32x4.all_true -;; CHECK-NEXT: (f32x4.eq -;; CHECK-NEXT: (local.get $0) -;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: (i32.and +;; CHECK-NEXT: (i32.and +;; CHECK-NEXT: (f32.eq +;; CHECK-NEXT: (f32x4.extract_lane 0 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (f32x4.extract_lane 0 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (f32.eq +;; CHECK-NEXT: (f32x4.extract_lane 1 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (f32x4.extract_lane 1 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (i32.and +;; CHECK-NEXT: (f32.eq +;; CHECK-NEXT: (f32x4.extract_lane 2 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (f32x4.extract_lane 2 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (f32.eq +;; CHECK-NEXT: (f32x4.extract_lane 3 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: (f32x4.extract_lane 3 +;; CHECK-NEXT: (local.get $0) +;; CHECK-NEXT: ) +;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (then From d4bfec9c5e39f3795fe72f4d0db5d07776f82e46 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 14:02:32 -0800 Subject: [PATCH 11/14] format --- src/passes/DeNaN.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 0e9c2850c10..f3739a6759f 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -170,13 +170,16 @@ struct DeNaN : public WalkerPass< // is tricky (we can't depend on function names etc. while fuzzing). // Instead, extract the lanes and use f32 checks. auto getLane = [&](Index index) { - return builder.makeSIMDExtract(ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index); + return builder.makeSIMDExtract( + ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index); }; auto getLaneCheck = [&](Index index) { return builder.makeBinary(EqFloat32, getLane(index), getLane(index)); }; - auto* firstTwo = builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1)); - auto* lastTwo = builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3)); + auto* firstTwo = + builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1)); + auto* lastTwo = + builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3)); condition = builder.makeBinary(AndInt32, firstTwo, lastTwo); } func->body = builder.makeIf( From 66c2c50996d1b334453e0cb6ae70d4c22b028052 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 15 Feb 2024 14:22:17 -0800 Subject: [PATCH 12/14] comment --- src/passes/DeNaN.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index f3739a6759f..0f825452a64 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -159,8 +159,6 @@ struct DeNaN : public WalkerPass< // exponent vs 11), and it is checked in more places (4 32-bit values vs // 2 64-bit ones), so we can just check that. That is, this reduces to 4 // checks of f32s, but is otherwise the same as a check of a single f32. - // The Binary above already does the 4 checks, so all we have left is to - // process those four results. // // However there is additional complexity, which is that if we do // EqVecF32x4 then we get all-1s for each case where we compare equal. From c08fa5d3b95b09c2d9f25032f9fcf929cdc45e23 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Feb 2024 14:14:53 -0800 Subject: [PATCH 13/14] feedback --- src/passes/DeNaN.cpp | 123 ++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 0f825452a64..4e9f2559661 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -68,7 +68,7 @@ struct DeNaN : public WalkerPass< replacement = builder.makeCall(deNan64, {expr}, Type::f64); } } else if (expr->type == Type::v128) { - if (c && maybeNaN(c)) { + if (c && hasNaNLane(c)) { uint8_t zero[16] = {}; replacement = builder.makeConst(Literal(zero)); } else { @@ -133,71 +133,76 @@ struct DeNaN : public WalkerPass< module); // Add helper functions after the walk, so they are not instrumented. - Builder builder(*module); - auto add = [&](Name name, Type type, Literal literal, BinaryOp op) { - auto func = Builder::makeFunction(name, Signature(type, type), {}); - // Compare the value to itself to check if it is a NaN, and return 0 if - // so: - // - // (if (result f*) - // (f*.eq - // (local.get $0) - // (local.get $0) - // ) - // (local.get $0) - // (f*.const 0) - // ) - Expression* condition; - if (type != Type::v128) { - // Generate a simple condition. - condition = builder.makeBinary( - op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)); - } else { - // v128 is trickier as the 128 bits may contain f32s or f64s, and we - // need to check for nans both ways in principle. However, the f32 NaN - // pattern is a superset of f64, since it checks less bits (8 bit - // exponent vs 11), and it is checked in more places (4 32-bit values vs - // 2 64-bit ones), so we can just check that. That is, this reduces to 4 - // checks of f32s, but is otherwise the same as a check of a single f32. - // - // However there is additional complexity, which is that if we do - // EqVecF32x4 then we get all-1s for each case where we compare equal. - // That itself is a NaN pattern, which means that running this pass - // twice would interfere with itself. To avoid that we'd need a way to - // detect our previous instrumentation and not instrument it, but that - // is tricky (we can't depend on function names etc. while fuzzing). - // Instead, extract the lanes and use f32 checks. - auto getLane = [&](Index index) { - return builder.makeSIMDExtract( - ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index); - }; - auto getLaneCheck = [&](Index index) { - return builder.makeBinary(EqFloat32, getLane(index), getLane(index)); - }; - auto* firstTwo = - builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1)); - auto* lastTwo = - builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3)); - condition = builder.makeBinary(AndInt32, firstTwo, lastTwo); - } - func->body = builder.makeIf( - condition, builder.makeLocalGet(0, type), builder.makeConst(literal)); - module->addFunction(std::move(func)); - }; - - add(deNan32, Type::f32, Literal(float(0)), EqFloat32); - add(deNan64, Type::f64, Literal(double(0)), EqFloat64); + addFunc(module, deNan32, Type::f32, Literal(float(0)), EqFloat32); + addFunc(module, deNan64, Type::f64, Literal(double(0)), EqFloat64); if (module->features.hasSIMD()) { uint8_t zero128[16] = {}; - // InvalidBinary is passed here as the condition in this case is more - // complex and computed above. - add(deNan128, Type::v128, Literal(zero128), InvalidBinary); + addFunc(module, deNan128, Type::v128, Literal(zero128)); } } + // Add a de-NaN-ing helper function. + void addFunc(Module* module, + Name name, + Type type, + Literal literal, + std::optional op={}) { + Builder builder(*module); + auto func = Builder::makeFunction(name, Signature(type, type), {}); + // Compare the value to itself to check if it is a NaN, and return 0 if + // so: + // + // (if (result f*) + // (f*.eq + // (local.get $0) + // (local.get $0) + // ) + // (local.get $0) + // (f*.const 0) + // ) + Expression* condition; + if (type != Type::v128) { + // Generate a simple condition. + assert(op); + condition = builder.makeBinary( + *op, builder.makeLocalGet(0, type), builder.makeLocalGet(0, type)); + } else { + assert(!op); + // v128 is trickier as the 128 bits may contain f32s or f64s, and we + // need to check for nans both ways in principle. However, the f32 NaN + // pattern is a superset of f64, since it checks less bits (8 bit + // exponent vs 11), and it is checked in more places (4 32-bit values vs + // 2 64-bit ones), so we can just check that. That is, this reduces to 4 + // checks of f32s, but is otherwise the same as a check of a single f32. + // + // However there is additional complexity, which is that if we do + // EqVecF32x4 then we get all-1s for each case where we compare equal. + // That itself is a NaN pattern, which means that running this pass + // twice would interfere with itself. To avoid that we'd need a way to + // detect our previous instrumentation and not instrument it, but that + // is tricky (we can't depend on function names etc. while fuzzing). + // Instead, extract the lanes and use f32 checks. + auto getLane = [&](Index index) { + return builder.makeSIMDExtract( + ExtractLaneVecF32x4, builder.makeLocalGet(0, type), index); + }; + auto getLaneCheck = [&](Index index) { + return builder.makeBinary(EqFloat32, getLane(index), getLane(index)); + }; + auto* firstTwo = + builder.makeBinary(AndInt32, getLaneCheck(0), getLaneCheck(1)); + auto* lastTwo = + builder.makeBinary(AndInt32, getLaneCheck(2), getLaneCheck(3)); + condition = builder.makeBinary(AndInt32, firstTwo, lastTwo); + } + func->body = builder.makeIf( + condition, builder.makeLocalGet(0, type), builder.makeConst(literal)); + module->addFunction(std::move(func)); + }; + // Check if a contant v128 may contain f32 or f64 NaNs. - bool maybeNaN(Const* c) { + bool hasNaNLane(Const* c) { assert(c->type == Type::v128); auto value = c->value; From cf07915fb4bc80f78d3ecfbb402a8dc2ceebcad3 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Feb 2024 15:42:39 -0800 Subject: [PATCH 14/14] format --- src/passes/DeNaN.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/passes/DeNaN.cpp b/src/passes/DeNaN.cpp index 4e9f2559661..b0bd5fb60de 100644 --- a/src/passes/DeNaN.cpp +++ b/src/passes/DeNaN.cpp @@ -147,7 +147,7 @@ struct DeNaN : public WalkerPass< Name name, Type type, Literal literal, - std::optional op={}) { + std::optional op = {}) { Builder builder(*module); auto func = Builder::makeFunction(name, Signature(type, type), {}); // Compare the value to itself to check if it is a NaN, and return 0 if