Skip to content

Commit

Permalink
Optimize the implementation not to use any external function.
Browse files Browse the repository at this point in the history
The special code path for zeros and NaNs is removed. They fall through
as the general case.

We avoid `$Number` by using the asm.js idiom `v = +x`.

We avoid `abs` by using `s * v`. This causes `-0` to fall through as
is, but it turns out that it has the desired effect in the end.

We optimize the subnormal path to use only one multiplication and one
division.

We dispatch the overflow case from the normal and subnormal cases ahead
of time, which allows to more easily justify the correctness of the
algorithm.

We hard-code the magical constants to avoid having to look up external
identifiers, ensuring that immediate values are used in the generated
code.

Finally, and perhaps most importantly, we add significant documentation
about the algorithms for the normal and subnormal paths, with---if not
proof---at least an argument and references of why they are correct.
  • Loading branch information
sjrd committed Feb 11, 2022
1 parent be55c4c commit 34070bd
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 36 deletions.
133 changes: 99 additions & 34 deletions implementation.js
Original file line number Diff line number Diff line change
@@ -1,42 +1,107 @@
'use strict';

var isNaN = require('is-nan');

var $isFinite = isFinite;
var $Number = Number;
var abs = Math.abs;
module.exports = function fround(x) {
/*
* We use magic numbers to guarantee immediate values.
* We extensively document their value in accompanying comments.
*/
/* eslint-disable no-magic-numbers */

var BINARY_32_EPSILON = 1.1920928955078125e-7; // 2 ** -23;
var BINARY_32_MIN_VALUE = 1.1754943508222875e-38; // 2 ** -126;
var BINARY_32_MAX_VALUE = 3.4028234663852886e+38; // 2 ** 128 - 2 ** 104
var EPSILON = 2.220446049250313e-16; // Number.EPSILON
// Apply ToNumber(x) without relying on any function
var v = +x; /* eslint-disable-line no-implicit-coercion */

var inverseEpsilon = 1 / EPSILON;
var roundTiesToEven = function roundTiesToEven(n) {
// Even though this reduces down to `return n`, it takes advantage of built-in rounding.
return (n + inverseEpsilon) - inverseEpsilon;
};
var s = v < 0 ? -1 : 1; // 1 for NaN, +0 and -0
var mod = s * v; // abs(v), or -0 if v is -0

module.exports = function fround(x) {
var v = $Number(x);
if (v === 0 || isNaN(v) || !$isFinite(v)) {
return v;
}
var s = v < 0 ? -1 : 1;
var mod = abs(v);
if (mod < BINARY_32_MIN_VALUE) {
/* eslint operator-linebreak: [2, "before"] */
return (
s
* roundTiesToEven(mod / BINARY_32_MIN_VALUE / BINARY_32_EPSILON)
* BINARY_32_MIN_VALUE * BINARY_32_EPSILON
);
}
// Veltkamp's splitting (?)
var a = (1 + (BINARY_32_EPSILON / EPSILON)) * mod;
var result = a - (a - mod);
if (result > BINARY_32_MAX_VALUE || isNaN(result)) {
/*
* Overflow case.
*
* The magic number 3.4028235677973366e38 is the threshold from which fround
* causes an overflow, resulting in Infinity.
* This code path is also used when the input is already an Infinity.
*/
if (mod >= 3.4028235677973366e38) { // overflow-threshold
return s * Infinity;
}
return s * result;

/*
* Normal form case.
*
* The magic number 1.1754943508222875e-38 is the threshold from which the
* result of fround is represented as a normal form in binary32.
*
* Here, we know that both the input and output are expressed in a normal
* form as `number`s as well, so standard floating point algorithms from
* papers can be used.
*
* We use Veltkamp's splitting, as described and studied in
* Sylvie Boldo.
* Pitfalls of a Full Floating-Point Proof:
* Example on the Formal Proof of the Veltkamp/Dekker Algorithms
* https://dx.doi.org/10.1007/11814771_6
* Section 3, with β = 2, t = 53, s = 53 - 24 = 29, x = mod.
* 53 is the number of effective mantissa bits in a Double; 24 in a Float.
*
* ◦ is the round-to-nearest operation with a tie-breaking rule (in our case,
* ties-to-even).
*
* Let C = βˢ + 1 = 536870913
* p = ◦(x × C)
* q = ◦(x − p)
* x₁ = ◦(p + q)
*
* Boldo proves that x₁ is the (t-s)-bit float closest to x, using the same
* tie-breaking rule as ◦. Since (t-s) = 24, this is the closest binary32
* value (with 24 mantissa bits), and therefore the correct result of
* `fround`.
*
* Boldo also proves that if the computation of x × C does not overflow, then
* none of the following operations will overflow. We know that x (mod) is
* less than the overflow-threshold, and overflow-threshold × C does not
* overflow, so that computation can never cause an overflow.
*
* If the reader does not have access to Boldo's paper, they may refer
* instead to
* Claude-Pierre Jeannerod, Jean-Michel Muller, Paul Zimmermann.
* On various ways to split a floating-point number.
* ARITH 2018 - 25th IEEE Symposium on Computer Arithmetic,
* Jun 2018, Amherst (MA), United States.
* pp.53-60, 10.1109/ARITH.2018.8464793. hal-01774587v2
* available at
* https://hal.inria.fr/hal-01774587v2/document
* Section III, although that paper defers some theorems and proofs to
* Boldo's.
*/
if (mod >= 1.1754943508222875e-38) { // normal-form-threshold
var p = mod * 536870913; // 2**29 + 1
return s * (p + (mod - p));
}

/*
* Subnormal form case.
*
* We round `mod` to the nearest multiple of the smallest positive binary32
* value (which we call `Min32`), breaking ties to an even multiple.
*
* We do this by leveraging the inherent loss of precision near the minimum
* positive `number` value (`Min64`): conceptually, we divide the value by
* Min32 / Min64
* which will drop the excess precision, applying exactly the rounding
* strategy that we want. Then we multiply the value back by the same
* constant.
*
* However, `Min32 / Min64` is not representable as a finite `number`.
* Therefore, we instead use the *inverse* constant
* Min64 / Min32
* and we first multiply by that constant, then divide by it.
*
* ---
*
* As an additional "hack", the input values NaN, +0 and -0 also fall in this
* code path. For them, this computation happens to be an identity, and is
* therefore correct as well.
*/
return s * ((mod * 3.5257702653609953e-279) / 3.5257702653609953e-279); // Min64 / Min32

/* eslint-enable no-magic-numbers */
};
3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@
},
"dependencies": {
"call-bind": "^1.0.2",
"define-properties": "^1.1.3",
"is-nan": "^1.3.2"
"define-properties": "^1.1.3"
},
"auto-changelog": {
"output": "CHANGELOG.md",
Expand Down
16 changes: 16 additions & 0 deletions test/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ module.exports = function (fround, t) {
st.equal(fround(maxFloat32), maxFloat32, 'fround(maxFloat32)');
st.equal(fround(-maxFloat32), -maxFloat32, 'fround(-maxFloat32)');

var maxValueThatRoundsDownToMaxFloat32 = 3.4028235677973362e+38;
st.equal(fround(maxValueThatRoundsDownToMaxFloat32), maxFloat32, 'fround(maxValueThatRoundsDownToMaxFloat32)');
st.equal(fround(-maxValueThatRoundsDownToMaxFloat32), -maxFloat32, 'fround(-maxValueThatRoundsDownToMaxFloat32)');

// round-nearest rounds down to maxFloat32
st.equal(fround(maxFloat32 + Math.pow(2, Math.pow(2, 8 - 1) - 1 - 23 - 2)), maxFloat32, 'fround(maxFloat32 + pow(2, pow(2, 8 - 1) - 1 - 23 - 2))');
st.end();
Expand All @@ -65,4 +69,16 @@ module.exports = function (fround, t) {
st.equal(fround((-minFloat32 / 2) - Math.pow(2, -202)), -minFloat32, 'fround((-minFloat32 / 2) - pow(2, -202))');
st.end();
});

t.test('rounds properly with the subnormal-normal boundary', function (st) {
var maxSubnormal32 = 1.1754942106924411e-38;
var minNormal32 = 1.1754943508222875e-38;
st.equal(fround(1.1754942807573642e-38), maxSubnormal32, 'fround(1.1754942807573642e-38)');
st.equal(fround(1.1754942807573643e-38), minNormal32, 'fround(1.1754942807573643e-38)');
st.equal(fround(1.1754942807573644e-38), minNormal32, 'fround(1.1754942807573644e-38)');
st.equal(fround(-1.1754942807573642e-38), -maxSubnormal32, 'fround(-1.1754942807573642e-38)');
st.equal(fround(-1.1754942807573643e-38), -minNormal32, 'fround(-1.1754942807573643e-38)');
st.equal(fround(-1.1754942807573644e-38), -minNormal32, 'fround(-1.1754942807573644e-38)');
st.end();
});
};

0 comments on commit 34070bd

Please sign in to comment.