From dc5314fd3f3f637af527101913f55313f2e7e648 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 28 Jan 2023 06:29:21 -0600 Subject: [PATCH] GH-100485: Add extended accuracy test. Switch to faster fma() based variant. GH-101383) --- Lib/test/test_math.py | 83 +++++++++++++++++++++++++++++++++++++++++++ Modules/mathmodule.c | 53 +++++++++------------------ 2 files changed, 100 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index b8ac8f32055d33e..fcdec93484e0003 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -1369,6 +1369,89 @@ def run(func, *args): args, ) + @requires_IEEE_754 + @unittest.skipIf(HAVE_DOUBLE_ROUNDING, + "sumprod() accuracy not guaranteed on machines with double rounding") + @support.cpython_only # Other implementations may choose a different algorithm + @support.requires_resource('cpu') + def test_sumprod_extended_precision_accuracy(self): + import operator + from fractions import Fraction + from itertools import starmap + from collections import namedtuple + from math import log2, exp2, fabs + from random import choices, uniform, shuffle + from statistics import median + + DotExample = namedtuple('DotExample', ('x', 'y', 'target_sumprod', 'condition')) + + def DotExact(x, y): + vec1 = map(Fraction, x) + vec2 = map(Fraction, y) + return sum(starmap(operator.mul, zip(vec1, vec2, strict=True))) + + def Condition(x, y): + return 2.0 * DotExact(map(abs, x), map(abs, y)) / abs(DotExact(x, y)) + + def linspace(lo, hi, n): + width = (hi - lo) / (n - 1) + return [lo + width * i for i in range(n)] + + def GenDot(n, c): + """ Algorithm 6.1 (GenDot) works as follows. The condition number (5.7) of + the dot product xT y is proportional to the degree of cancellation. In + order to achieve a prescribed cancellation, we generate the first half of + the vectors x and y randomly within a large exponent range. This range is + chosen according to the anticipated condition number. The second half of x + and y is then constructed choosing xi randomly with decreasing exponent, + and calculating yi such that some cancellation occurs. Finally, we permute + the vectors x, y randomly and calculate the achieved condition number. + """ + + assert n >= 6 + n2 = n // 2 + x = [0.0] * n + y = [0.0] * n + b = log2(c) + + # First half with exponents from 0 to |_b/2_| and random ints in between + e = choices(range(int(b/2)), k=n2) + e[0] = int(b / 2) + 1 + e[-1] = 0.0 + + x[:n2] = [uniform(-1.0, 1.0) * exp2(p) for p in e] + y[:n2] = [uniform(-1.0, 1.0) * exp2(p) for p in e] + + # Second half + e = list(map(round, linspace(b/2, 0.0 , n-n2))) + for i in range(n2, n): + x[i] = uniform(-1.0, 1.0) * exp2(e[i - n2]) + y[i] = (uniform(-1.0, 1.0) * exp2(e[i - n2]) - DotExact(x, y)) / x[i] + + # Shuffle + pairs = list(zip(x, y)) + shuffle(pairs) + x, y = zip(*pairs) + + return DotExample(x, y, DotExact(x, y), Condition(x, y)) + + def RelativeError(res, ex): + x, y, target_sumprod, condition = ex + n = DotExact(list(x) + [-res], list(y) + [1]) + return fabs(n / target_sumprod) + + def Trial(dotfunc, c, n): + ex = GenDot(10, c) + res = dotfunc(ex.x, ex.y) + return RelativeError(res, ex) + + times = 1000 # Number of trials + n = 20 # Length of vectors + c = 1e30 # Target condition number + + relative_err = median(Trial(math.sumprod, c, n) for i in range(times)) + self.assertLess(relative_err, 1e-16) + def testModf(self): self.assertRaises(TypeError, math.modf) diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 2da50bbd54b57f7..481e29915464f11 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2832,12 +2832,7 @@ long_add_would_overflow(long a, long b) } /* -Double and triple length extended precision floating point arithmetic -based on: - - A Floating-Point Technique for Extending the Available Precision - by T. J. Dekker - https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf +Double and triple length extended precision algorithms from: Accurate Sum and Dot Product by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi @@ -2848,36 +2843,22 @@ based on: typedef struct{ double hi; double lo; } DoubleLength; -static inline DoubleLength -twosum(double a, double b) +static DoubleLength +dl_sum(double a, double b) { - // Rump Algorithm 3.1 Error-free transformation of the sum + /* Algorithm 3.1 Error-free transformation of the sum */ double x = a + b; double z = x - a; double y = (a - (x - z)) + (b - z); return (DoubleLength) {x, y}; } -static inline DoubleLength -dl_split(double x) { - // Rump Algorithm 3.2 Error-free splitting of a floating point number - // Dekker (5.5) and (5.6). - double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 - double hi = t - (t - x); - double lo = x - hi; - return (DoubleLength) {hi, lo}; -} - -static inline DoubleLength +static DoubleLength dl_mul(double x, double y) { - // Dekker (5.12) and mul12() - DoubleLength xx = dl_split(x); - DoubleLength yy = dl_split(y); - double p = xx.hi * yy.hi; - double q = xx.hi * yy.lo + xx.lo * yy.hi; - double z = p + q; - double zz = p - z + q + xx.lo * yy.lo; + /* Algorithm 3.5. Error-free transformation of a product */ + double z = x * y; + double zz = fma(x, y, -z); return (DoubleLength) {z, zz}; } @@ -2885,21 +2866,21 @@ typedef struct { double hi; double lo; double tiny; } TripleLength; static const TripleLength tl_zero = {0.0, 0.0, 0.0}; -static inline TripleLength -tl_fma(TripleLength total, double x, double y) +static TripleLength +tl_fma(double x, double y, TripleLength total) { - // Rump Algorithm 5.10 with K=3 and using SumKVert + /* Algorithm 5.10 with SumKVert for K=3 */ DoubleLength pr = dl_mul(x, y); - DoubleLength sm = twosum(total.hi, pr.hi); - DoubleLength r1 = twosum(total.lo, pr.lo); - DoubleLength r2 = twosum(r1.hi, sm.lo); + DoubleLength sm = dl_sum(total.hi, pr.hi); + DoubleLength r1 = dl_sum(total.lo, pr.lo); + DoubleLength r2 = dl_sum(r1.hi, sm.lo); return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo}; } -static inline double +static double tl_to_d(TripleLength total) { - DoubleLength last = twosum(total.lo, total.hi); + DoubleLength last = dl_sum(total.lo, total.hi); return total.tiny + last.lo + last.hi; } @@ -3066,7 +3047,7 @@ math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) } else { goto finalize_flt_path; } - TripleLength new_flt_total = tl_fma(flt_total, flt_p, flt_q); + TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total); if (isfinite(new_flt_total.hi)) { flt_total = new_flt_total; flt_total_in_use = true;