From 8acb9eda7d074e81106a02f7537e2211e32e3141 Mon Sep 17 00:00:00 2001
From: Dmitry Panov <dop@itoolabs.com>
Date: Fri, 24 Apr 2020 23:03:52 +0100
Subject: [PATCH] Performance optimisations

---
 ftoa/common.go      |  4 +--
 ftoa/ftobasestr.go  |  3 +-
 ftoa/ftostr.go      | 83 +++++++++++++++++++++++++++++++++------------
 ftoa/ftostr_test.go | 34 +++++++++++++++++++
 4 files changed, 99 insertions(+), 25 deletions(-)

diff --git a/ftoa/common.go b/ftoa/common.go
index 5906464f..ea632df4 100644
--- a/ftoa/common.go
+++ b/ftoa/common.go
@@ -97,7 +97,7 @@ func stuffBits(bits []byte, offset int, val uint32) {
 	bits[offset+3] = byte(val)
 }
 
-func d2b(d float64) (b *big.Int, e, bits int) {
+func d2b(d float64, bi *big.Int) (e, bits int) {
 	dBits := math.Float64bits(d)
 	d0 := uint32(dBits >> 32)
 	d1 := uint32(dBits)
@@ -144,6 +144,6 @@ func d2b(d float64) (b *big.Int, e, bits int) {
 		e = de - bias - (p - 1) + 1 + k
 		bits = 32*i - hi0bits(z)
 	}
-	b = (&big.Int{}).SetBytes(dbl_bits)
+	bi.SetBytes(dbl_bits)
 	return
 }
diff --git a/ftoa/ftobasestr.go b/ftoa/ftobasestr.go
index a79050fe..424a5788 100644
--- a/ftoa/ftobasestr.go
+++ b/ftoa/ftobasestr.go
@@ -64,7 +64,8 @@ func FToBaseStr(num float64, radix int) string {
 		word0 := uint32(dBits >> 32)
 		word1 := uint32(dBits)
 
-		b, e, _ := d2b(df)
+		b := new(big.Int)
+		e, _ := d2b(df, b)
 		//            JS_ASSERT(e < 0);
 		/* At this point df = b * 2^e.  e must be less than zero because 0 < df < 1. */
 
diff --git a/ftoa/ftostr.go b/ftoa/ftostr.go
index f1c296bf..e0d7a097 100644
--- a/ftoa/ftostr.go
+++ b/ftoa/ftostr.go
@@ -36,6 +36,9 @@ var (
 	big5  = big.NewInt(5)
 	big10 = big.NewInt(10)
 
+	p05       = []*big.Int{big5, big.NewInt(25), big.NewInt(125)}
+	pow5Cache [7]*big.Int
+
 	dtoaModes = []int{
 		ModeStandard:            0,
 		ModeStandardExponential: 0,
@@ -100,7 +103,8 @@ func ftoa(d float64, mode int, biasUp bool, ndigits int, buf []byte) ([]byte, in
 	if sign {
 		buf = append(buf, '-')
 	}
-	b, be, bbits := d2b(d)
+	b := new(big.Int)
+	be, bbits := d2b(d, b)
 	i := int((word0 >> exp_shift1) & (exp_mask >> exp_shift1))
 	var d2 float64
 	var denorm bool
@@ -534,9 +538,8 @@ func ftoa(d float64, mode int, biasUp bool, ndigits int, buf []byte) ([]byte, in
 		}
 		/* mlo/S = maximum acceptable error, divided by 10^k, if the output is less than d. */
 		/* mhi/S = maximum acceptable error, divided by 10^k, if the output is greater than d. */
-
+		var z, delta big.Int
 		for i = 1; ; i++ {
-			z := new(big.Int)
 			z.DivMod(b, S, b)
 			dig = byte(z.Int64() + '0')
 			/* Do we yet have the shortest decimal string
@@ -544,12 +547,12 @@ func ftoa(d float64, mode int, biasUp bool, ndigits int, buf []byte) ([]byte, in
 			 */
 			j = b.Cmp(mlo)
 			/* j is b/S compared with mlo/S. */
-			delta := new(big.Int).Sub(S, mhi)
+			delta.Sub(S, mhi)
 			var j1 int
 			if delta.Sign() <= 0 {
 				j1 = 1
 			} else {
-				j1 = b.Cmp(delta)
+				j1 = b.Cmp(&delta)
 			}
 			/* j1 is b/S compared with 1 - mhi/S. */
 			if (j1 == 0) && (mode == 0) && ((_word1(d) & 1) == 0) {
@@ -560,13 +563,13 @@ func ftoa(d float64, mode int, biasUp bool, ndigits int, buf []byte) ([]byte, in
 						k++
 						buf = append(buf, '1')
 					}
-					return buf, int(k + 1)
+					return buf, k + 1
 				}
 				if j > 0 {
 					dig++
 				}
 				buf = append(buf, dig)
-				return buf, int(k + 1)
+				return buf, k + 1
 			}
 			if (j < 0) || ((j == 0) && (mode == 0) && ((_word1(d) & 1) == 0)) {
 				if j1 > 0 {
@@ -583,28 +586,25 @@ func ftoa(d float64, mode int, biasUp bool, ndigits int, buf []byte) ([]byte, in
 								k++
 								buf = append(buf, '1')
 							}
-							return buf, int(k + 1)
+							return buf, k + 1
 						}
 					}
 				}
 				buf = append(buf, dig)
-				return buf, int(k + 1)
+				return buf, k + 1
 			}
 			if j1 > 0 {
 				if dig == '9' { /* possible if i == 1 */
-					//                    round_9_up:
-					//                        *s++ = '9';
-					//                        goto roundoff;
 					buf = append(buf, '9')
 					buf, flag := roundOff(buf)
 					if flag {
 						k++
 						buf = append(buf, '1')
 					}
-					return buf, int(k + 1)
+					return buf, k + 1
 				}
 				buf = append(buf, dig+1)
-				return buf, int(k + 1)
+				return buf, k + 1
 			}
 			buf = append(buf, dig)
 			if i == ilim {
@@ -619,9 +619,8 @@ func ftoa(d float64, mode int, biasUp bool, ndigits int, buf []byte) ([]byte, in
 			}
 		}
 	} else {
+		var z big.Int
 		for i = 1; ; i++ {
-			//                (char)(dig = quorem(b,S) + '0');
-			z := new(big.Int)
 			z.DivMod(b, S, b)
 			dig = byte(z.Int64() + '0')
 			buf = append(buf, dig)
@@ -642,13 +641,13 @@ func ftoa(d float64, mode int, biasUp bool, ndigits int, buf []byte) ([]byte, in
 		if flag {
 			k++
 			buf = append(buf, '1')
-			return buf, int(k + 1)
+			return buf, k + 1
 		}
 	} else {
 		buf = stripTrailingZeroes(buf)
 	}
 
-	return buf, int(k + 1)
+	return buf, k + 1
 }
 
 func insert(b []byte, p int, c byte) []byte {
@@ -658,6 +657,16 @@ func insert(b []byte, p int, c byte) []byte {
 	return b
 }
 
+func expand(b []byte, delta int) []byte {
+	newLen := len(b) + delta
+	if newLen <= cap(b) {
+		return b[:newLen]
+	}
+	b1 := make([]byte, newLen)
+	copy(b1, b)
+	return b1
+}
+
 func FToStr(d float64, mode FToStrMode, precision int, buffer []byte) []byte {
 	if mode == ModeFixed && (d >= 1e21 || d <= -1e21) {
 		mode = ModeStandard
@@ -736,10 +745,13 @@ func FToStr(d float64, mode FToStrMode, precision int, buffer []byte) []byte {
 				if sign {
 					o = 1
 				}
-				for i := 0; i < 1-decPt; i++ {
-					buffer = insert(buffer, o, '0')
+				buffer = expand(buffer, 2-decPt)
+				copy(buffer[o+2-decPt:], buffer[o:])
+				buffer[o] = '0'
+				buffer[o+1] = '.'
+				for i := o + 2; i < o+2-decPt; i++ {
+					buffer[i] = '0'
 				}
-				buffer = insert(buffer, o+1, '.')
 			}
 		}
 	}
@@ -792,8 +804,26 @@ func stripTrailingZeroes(buf []byte) []byte {
 }
 
 /* Set b = b * 5^k.  k must be nonnegative. */
-// XXXX the C version built a cache of these
 func pow5mult(b *big.Int, k int) *big.Int {
+	if k < (1 << (len(pow5Cache) + 2)) {
+		i := k & 3
+		if i != 0 {
+			b.Mul(b, p05[i-1])
+		}
+		k >>= 2
+		i = 0
+		for {
+			if k&1 != 0 {
+				b.Mul(b, pow5Cache[i])
+			}
+			k >>= 1
+			if k == 0 {
+				break
+			}
+			i++
+		}
+		return b
+	}
 	return b.Mul(b, new(big.Int).Exp(big5, big.NewInt(int64(k)), nil))
 }
 
@@ -812,3 +842,12 @@ func roundOff(buf []byte) ([]byte, bool) {
 	}
 	return buf[:stop], true
 }
+
+func init() {
+	p := big.NewInt(625)
+	pow5Cache[0] = p
+	for i := 1; i < len(pow5Cache); i++ {
+		p = new(big.Int).Mul(p, p)
+		pow5Cache[i] = p
+	}
+}
diff --git a/ftoa/ftostr_test.go b/ftoa/ftostr_test.go
index c3b343c7..e645b598 100644
--- a/ftoa/ftostr_test.go
+++ b/ftoa/ftostr_test.go
@@ -2,6 +2,7 @@ package ftoa
 
 import (
 	"math"
+	"strconv"
 	"testing"
 )
 
@@ -31,7 +32,40 @@ func TestDtostr(t *testing.T) {
 	testFToStr(8.85, ModeExponential, 2, "8.8e+0", t)
 	testFToStr(885, ModeExponential, 2, "8.9e+2", t)
 	testFToStr(25, ModeExponential, 1, "3e+1", t)
+	testFToStr(1e-6, ModeFixed, 7, "0.0000010", t)
 	testFToStr(math.Inf(1), ModeStandard, 0, "Infinity", t)
 	testFToStr(math.NaN(), ModeStandard, 0, "NaN", t)
 	testFToStr(math.SmallestNonzeroFloat64, ModeExponential, 40, "4.940656458412465441765687928682213723651e-324", t)
 }
+
+func BenchmarkDtostrSmall(b *testing.B) {
+	var buf [128]byte
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		FToStr(math.Pi, ModeExponential, 0, buf[:0])
+	}
+}
+
+func BenchmarkDtostrBig(b *testing.B) {
+	var buf [128]byte
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		FToStr(math.SmallestNonzeroFloat64, ModeExponential, 40, buf[:0])
+	}
+}
+
+func BenchmarkAppendFloatBig(b *testing.B) {
+	var buf [128]byte
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		strconv.AppendFloat(buf[:0], math.SmallestNonzeroFloat64, 'e', 40, 64)
+	}
+}
+
+func BenchmarkAppendFloatSmall(b *testing.B) {
+	var buf [128]byte
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		strconv.AppendFloat(buf[:0], math.Pi, 'e', -1, 64)
+	}
+}