Optimize the implementation not to use any external function.

The special code path for zeros and NaNs is removed. They fall through as the general case. We avoid `$Number` by using the asm.js idiom `v = +x`. We avoid `abs` by using `s * v`. This causes `-0` to fall through as is, but it turns out that it has the desired effect in the end. We optimize the subnormal path to use only one multiplication and one division. We dispatch the overflow case from the normal and subnormal cases ahead of time, which allows to more easily justify the correctness of the algorithm. We hard-code the magical constants to avoid having to look up external identifiers, ensuring that immediate values are used in the generated code. Finally, and perhaps most importantly, we add significant documentation about the algorithms for the normal and subnormal paths, with---if not proof---at least an argument and references of why they are correct.
es-shims · Feb 11, 2022 · 34070bd · 34070bd
1 parent be55c4c
commit 34070bd
Show file tree

Hide file tree

Showing 3 changed files with 116 additions and 36 deletions.
diff --git a/implementation.js b/implementation.js
@@ -1,42 +1,107 @@
 'use strict';
 
-var isNaN = require('is-nan');
-
-var $isFinite = isFinite;
-var $Number = Number;
-var abs = Math.abs;
+module.exports = function fround(x) {
+	/*
+	 * We use magic numbers to guarantee immediate values.
+	 * We extensively document their value in accompanying comments.
+	 */
+	/* eslint-disable no-magic-numbers */
 
-var BINARY_32_EPSILON = 1.1920928955078125e-7; // 2 ** -23;
-var BINARY_32_MIN_VALUE = 1.1754943508222875e-38; // 2 ** -126;
-var BINARY_32_MAX_VALUE = 3.4028234663852886e+38; // 2 ** 128 - 2 ** 104
-var EPSILON = 2.220446049250313e-16; // Number.EPSILON
+	// Apply ToNumber(x) without relying on any function
+	var v = +x; /* eslint-disable-line no-implicit-coercion */
 
-var inverseEpsilon = 1 / EPSILON;
-var roundTiesToEven = function roundTiesToEven(n) {
-	// Even though this reduces down to `return n`, it takes advantage of built-in rounding.
-	return (n + inverseEpsilon) - inverseEpsilon;
-};
+	var s = v < 0 ? -1 : 1; // 1 for NaN, +0 and -0
+	var mod = s * v; // abs(v), or -0 if v is -0
 
-module.exports = function fround(x) {
-	var v = $Number(x);
-	if (v === 0 || isNaN(v) || !$isFinite(v)) {
-		return v;
-	}
-	var s = v < 0 ? -1 : 1;
-	var mod = abs(v);
-	if (mod < BINARY_32_MIN_VALUE) {
-		/* eslint operator-linebreak: [2, "before"] */
-		return (
-			s
-			* roundTiesToEven(mod / BINARY_32_MIN_VALUE / BINARY_32_EPSILON)
-			* BINARY_32_MIN_VALUE * BINARY_32_EPSILON
-		);
-	}
-	// Veltkamp's splitting (?)
-	var a = (1 + (BINARY_32_EPSILON / EPSILON)) * mod;
-	var result = a - (a - mod);
-	if (result > BINARY_32_MAX_VALUE || isNaN(result)) {
+	/*
+	 * Overflow case.
+	 *
+	 * The magic number 3.4028235677973366e38 is the threshold from which fround
+	 * causes an overflow, resulting in Infinity.
+	 * This code path is also used when the input is already an Infinity.
+	 */
+	if (mod >= 3.4028235677973366e38) { // overflow-threshold
 		return s * Infinity;
 	}
-	return s * result;
+
+	/*
+	 * Normal form case.
+	 *
+	 * The magic number 1.1754943508222875e-38 is the threshold from which the
+	 * result of fround is represented as a normal form in binary32.
+	 *
+	 * Here, we know that both the input and output are expressed in a normal
+	 * form as `number`s as well, so standard floating point algorithms from
+	 * papers can be used.
+	 *
+	 * We use Veltkamp's splitting, as described and studied in
+	 *   Sylvie Boldo.
+	 *   Pitfalls of a Full Floating-Point Proof:
+	 *   Example on the Formal Proof of the Veltkamp/Dekker Algorithms
+	 *   https://dx.doi.org/10.1007/11814771_6
+	 * Section 3, with β = 2, t = 53, s = 53 - 24 = 29, x = mod.
+	 * 53 is the number of effective mantissa bits in a Double; 24 in a Float.
+	 *
+	 * ◦ is the round-to-nearest operation with a tie-breaking rule (in our case,
+	 * ties-to-even).
+	 *
+	 *   Let C = βˢ + 1 = 536870913
+	 *   p = ◦(x × C)
+	 *   q = ◦(x − p)
+	 *   x₁ = ◦(p + q)
+	 *
+	 * Boldo proves that x₁ is the (t-s)-bit float closest to x, using the same
+	 * tie-breaking rule as ◦. Since (t-s) = 24, this is the closest binary32
+	 * value (with 24 mantissa bits), and therefore the correct result of
+	 * `fround`.
+	 *
+	 * Boldo also proves that if the computation of x × C does not overflow, then
+	 * none of the following operations will overflow. We know that x (mod) is
+	 * less than the overflow-threshold, and overflow-threshold × C does not
+	 * overflow, so that computation can never cause an overflow.
+	 *
+	 * If the reader does not have access to Boldo's paper, they may refer
+	 * instead to
+	 *   Claude-Pierre Jeannerod, Jean-Michel Muller, Paul Zimmermann.
+	 *   On various ways to split a floating-point number.
+	 *   ARITH 2018 - 25th IEEE Symposium on Computer Arithmetic,
+	 *   Jun 2018, Amherst (MA), United States.
+	 *   pp.53-60, 10.1109/ARITH.2018.8464793. hal-01774587v2
+	 * available at
+	 *   https://hal.inria.fr/hal-01774587v2/document
+	 * Section III, although that paper defers some theorems and proofs to
+	 * Boldo's.
+	 */
+	if (mod >= 1.1754943508222875e-38) { // normal-form-threshold
+		var p = mod * 536870913; // 2**29 + 1
+		return s * (p + (mod - p));
+	}
+
+	/*
+	 * Subnormal form case.
+	 *
+	 * We round `mod` to the nearest multiple of the smallest positive binary32
+	 * value (which we call `Min32`), breaking ties to an even multiple.
+	 *
+	 * We do this by leveraging the inherent loss of precision near the minimum
+	 * positive `number` value (`Min64`): conceptually, we divide the value by
+	 *   Min32 / Min64
+	 * which will drop the excess precision, applying exactly the rounding
+	 * strategy that we want. Then we multiply the value back by the same
+	 * constant.
+	 *
+	 * However, `Min32 / Min64` is not representable as a finite `number`.
+	 * Therefore, we instead use the *inverse* constant
+	 *   Min64 / Min32
+	 * and we first multiply by that constant, then divide by it.
+	 *
+	 * ---
+	 *
+	 * As an additional "hack", the input values NaN, +0 and -0 also fall in this
+	 * code path. For them, this computation happens to be an identity, and is
+	 * therefore correct as well.
+	 */
+	return s * ((mod * 3.5257702653609953e-279) / 3.5257702653609953e-279); // Min64 / Min32
+
+	/* eslint-enable no-magic-numbers */
 };
diff --git a/package.json b/package.json
@@ -69,8 +69,7 @@
   },
   "dependencies": {
     "call-bind": "^1.0.2",
-    "define-properties": "^1.1.3",
-    "is-nan": "^1.3.2"
+    "define-properties": "^1.1.3"
   },
   "auto-changelog": {
     "output": "CHANGELOG.md",

diff --git a/test/tests.js b/test/tests.js
@@ -50,6 +50,10 @@ module.exports = function (fround, t) {
 		st.equal(fround(maxFloat32), maxFloat32, 'fround(maxFloat32)');
 		st.equal(fround(-maxFloat32), -maxFloat32, 'fround(-maxFloat32)');
 
+		var maxValueThatRoundsDownToMaxFloat32 = 3.4028235677973362e+38;
+		st.equal(fround(maxValueThatRoundsDownToMaxFloat32), maxFloat32, 'fround(maxValueThatRoundsDownToMaxFloat32)');
+		st.equal(fround(-maxValueThatRoundsDownToMaxFloat32), -maxFloat32, 'fround(-maxValueThatRoundsDownToMaxFloat32)');
+
 		// round-nearest rounds down to maxFloat32
 		st.equal(fround(maxFloat32 + Math.pow(2, Math.pow(2, 8 - 1) - 1 - 23 - 2)), maxFloat32, 'fround(maxFloat32 + pow(2, pow(2, 8 - 1) - 1 - 23 - 2))');
 		st.end();
@@ -65,4 +69,16 @@ module.exports = function (fround, t) {
 		st.equal(fround((-minFloat32 / 2) - Math.pow(2, -202)), -minFloat32, 'fround((-minFloat32 / 2) - pow(2, -202))');
 		st.end();
 	});
+
+	t.test('rounds properly with the subnormal-normal boundary', function (st) {
+		var maxSubnormal32 = 1.1754942106924411e-38;
+		var minNormal32 = 1.1754943508222875e-38;
+		st.equal(fround(1.1754942807573642e-38), maxSubnormal32, 'fround(1.1754942807573642e-38)');
+		st.equal(fround(1.1754942807573643e-38), minNormal32, 'fround(1.1754942807573643e-38)');
+		st.equal(fround(1.1754942807573644e-38), minNormal32, 'fround(1.1754942807573644e-38)');
+		st.equal(fround(-1.1754942807573642e-38), -maxSubnormal32, 'fround(-1.1754942807573642e-38)');
+		st.equal(fround(-1.1754942807573643e-38), -minNormal32, 'fround(-1.1754942807573643e-38)');
+		st.equal(fround(-1.1754942807573644e-38), -minNormal32, 'fround(-1.1754942807573644e-38)');
+		st.end();
+	});
 };