From 5a2b984568f72f0d7ff7c7b4ee8ce31af9fd1b7e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 4 Feb 2023 17:54:44 -0600 Subject: [PATCH] GH-100485: Create an alternative code path when an accurate fma() implementation is not available (#101567) --- Lib/test/test_math.py | 5 +++++ Modules/mathmodule.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 2c84e55ab45c57..8d849045b2d11f 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1450,6 +1450,11 @@ def Trial(dotfunc, c, n): n = 20 # Length of vectors c = 1e30 # Target condition number + # If the following test fails, it means that the C math library + # implementation of fma() is not compliant with the C99 standard + # and is inaccurate. To solve this problem, make a new build + # with the symbol UNRELIABLE_FMA defined. That will enable a + # slower but accurate code path that avoids the fma() call. relative_err = median(Trial(math.sumprod, c, n) for i in range(times)) self.assertLess(relative_err, 1e-16) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index e6cdb3bae1ecff..654336d6d9f4bc 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2851,6 +2851,8 @@ dl_sum(double a, double b) return (DoubleLength) {x, y}; } +#ifndef UNRELIABLE_FMA + static DoubleLength dl_mul(double x, double y) { @@ -2860,6 +2862,47 @@ dl_mul(double x, double y) return (DoubleLength) {z, zz}; } +#else + +/* + The default implementation of dl_mul() depends on the C math library + having an accurate fma() function as required by ยง 7.12.13.1 of the + C99 standard. + + The UNRELIABLE_FMA option is provided as a slower but accurate + alternative for builds where the fma() function is found wanting. + The speed penalty may be modest (17% slower on an Apple M1 Max), + so don't hesitate to enable this build option. + + The algorithms are from the T. J. Dekker paper: + A Floating-Point Technique for Extending the Available Precision + https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf +*/ + +static DoubleLength +dl_split(double x) { + // Dekker (5.5) and (5.6). + double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 + double hi = t - (t - x); + double lo = x - hi; + return (DoubleLength) {hi, lo}; +} + +static DoubleLength +dl_mul(double x, double y) +{ + // Dekker (5.12) and mul12() + DoubleLength xx = dl_split(x); + DoubleLength yy = dl_split(y); + double p = xx.hi * yy.hi; + double q = xx.hi * yy.lo + xx.lo * yy.hi; + double z = p + q; + double zz = p - z + q + xx.lo * yy.lo; + return (DoubleLength) {z, zz}; +} + +#endif + typedef struct { double hi; double lo; double tiny; } TripleLength; static const TripleLength tl_zero = {0.0, 0.0, 0.0};