From aee9c8793f46c32e5662c5bd6862658860f8288b Mon Sep 17 00:00:00 2001
From: Kyle Nusbaum <kjn@9project.net>
Date: Thu, 9 May 2024 11:08:30 -0500
Subject: [PATCH] ddtrace/tracer: use math/rand/v2 for SpanID and TraceID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the new version 2 of the math/rand standard library package, there is
no longer a need to perform our own complicated initialization of an RNG,
and in fact, we should avoid doing so.

See more details about the new rand package here: https://go.dev/blog/randv2

Instead, we can rely on the global RNG provided by the Go runtime. We
previously chose not to do that because we could not rely on the global RNG
to be initialized, meaning there would likely be many trace and span ID
clashes. Likewise, it was a bad idea to initialize the RNG ourselves, since
as a library it's bad practice to mutate global state in a way that will
have visible effects on the application.

We previously chose to initialize our own RNG instance for the tracer and
synchronize access to it. Due to the low entropy of the RNG seed (it only
used 32 bits of the 64 provided) we also chose to xor the produced random
numbers with the current unix time in order to decrease collisions.

math/rand/v2 removes the need to do either of those things. The new global
RNG is automatically seeded at startup with enough entropy to remove the
need to xor with a timestamp. It also has the added benefit of being
per-goroutine, meaning there is no synchronization involved.

This PR adds a new implementation of the span/trace ID generator which will
be used for all Go applications compiled with go1.22.0 and later. The old
implementation remains for older versions of Go, but should be removed once
those versions fall out of the support window.

I've also added some benchmarks and used them to compare the old and new
implementations.

The benefits of this change are a smaller implementation and less
historical baggage, as well as improved performance if there are lots of
threads trying to generate spans.

```
goos: linux
goarch: amd64
pkg: gopkg.in/DataDog/dd-trace-go.v1/ddtrace/tracer
cpu: 13th Gen Intel(R) Core(TM) i5-1340P
                       │  old.bench   │              new.bench              │
                       │    sec/op    │   sec/op     vs base                │
StartSpan-16              1.681µ ± 3%   1.662µ ± 3%        ~ (p=0.447 n=10)
StartSpanConcurrent-16    4.673µ ± 1%   3.955µ ± 3%  -15.37% (p=0.000 n=10)
GenSpanID-16             13.840n ± 0%   4.502n ± 1%  -67.47% (p=0.000 n=10)
geomean                   477.3n        309.3n       -35.20%

                       │   old.bench    │               new.bench               │
                       │      B/op      │     B/op      vs base                 │
StartSpan-16             1.603Ki ± 0%     1.603Ki ± 0%       ~ (p=1.000 n=10) ¹
StartSpanConcurrent-16   16.05Ki ± 0%     16.04Ki ± 0%  -0.03% (p=0.000 n=10)
GenSpanID-16               0.000 ± 0%       0.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                               ²                 -0.01%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

                       │  old.bench   │              new.bench              │
                       │  allocs/op   │ allocs/op   vs base                 │
StartSpan-16             16.00 ± 0%     16.00 ± 0%       ~ (p=1.000 n=10) ¹
StartSpanConcurrent-16   160.0 ± 0%     160.0 ± 0%       ~ (p=1.000 n=10) ¹
GenSpanID-16             0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                             ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean
```
---
 ddtrace/tracer/rand.go             | 14 +++++++++++
 ddtrace/tracer/rand_go1_22.go      | 21 +++++++++++++++++
 ddtrace/tracer/sampler_test.go     |  4 ++--
 ddtrace/tracer/spancontext_test.go |  6 ++---
 ddtrace/tracer/tracer.go           |  7 ------
 ddtrace/tracer/tracer_test.go      | 38 ++++++++++++++++++++++++++++++
 ddtrace/tracer/writer_test.go      |  2 +-
 7 files changed, 79 insertions(+), 13 deletions(-)
 create mode 100644 ddtrace/tracer/rand_go1_22.go

diff --git a/ddtrace/tracer/rand.go b/ddtrace/tracer/rand.go
index ecedc3ed10..8eb04c0fed 100644
--- a/ddtrace/tracer/rand.go
+++ b/ddtrace/tracer/rand.go
@@ -3,6 +3,9 @@
 // This product includes software developed at Datadog (https://www.datadoghq.com/).
 // Copyright 2016 Datadog, Inc.
 
+//go:build !go1.22
+
+// TODO(knusbaum): This file should be deleted once go1.21 falls out of support
 package tracer
 
 import (
@@ -54,3 +57,14 @@ func (rs *safeSource) Seed(seed int64) {
 	rs.source.Seed(seed)
 	rs.Unlock()
 }
+
+// generateSpanID returns a random uint64 that has been XORd with the startTime.
+// This is done to get around the 32-bit random seed limitation that may create collisions if there is a large number
+// of go services all generating spans.
+func generateSpanID(startTime int64) uint64 {
+	return random.Uint64() ^ uint64(startTime)
+}
+
+func randUint64() uint64 {
+	return random.Uint64()
+}
diff --git a/ddtrace/tracer/rand_go1_22.go b/ddtrace/tracer/rand_go1_22.go
new file mode 100644
index 0000000000..9e7948e47e
--- /dev/null
+++ b/ddtrace/tracer/rand_go1_22.go
@@ -0,0 +1,21 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016 Datadog, Inc.
+
+//go:build go1.22
+
+package tracer
+
+import (
+	"math"
+	"math/rand/v2"
+)
+
+func randUint64() uint64 {
+	return rand.Uint64()
+}
+
+func generateSpanID(startTime int64) uint64 {
+	return rand.Uint64() & math.MaxInt64
+}
diff --git a/ddtrace/tracer/sampler_test.go b/ddtrace/tracer/sampler_test.go
index 89c1810836..4b8ab3fe2d 100644
--- a/ddtrace/tracer/sampler_test.go
+++ b/ddtrace/tracer/sampler_test.go
@@ -442,12 +442,12 @@ func TestRuleEnvVars(t *testing.T) {
 
 func TestRulesSampler(t *testing.T) {
 	makeSpan := func(op string, svc string) *span {
-		s := newSpan(op, svc, "res-10", random.Uint64(), random.Uint64(), 0)
+		s := newSpan(op, svc, "res-10", randUint64(), randUint64(), 0)
 		s.setMeta("hostname", "hn-30")
 		return s
 	}
 	makeFinishedSpan := func(op, svc, resource string, tags map[string]interface{}) *span {
-		s := newSpan(op, svc, resource, random.Uint64(), random.Uint64(), 0)
+		s := newSpan(op, svc, resource, randUint64(), randUint64(), 0)
 		for k, v := range tags {
 			s.SetTag(k, v)
 		}
diff --git a/ddtrace/tracer/spancontext_test.go b/ddtrace/tracer/spancontext_test.go
index cd7cfb39e7..60ee96127b 100644
--- a/ddtrace/tracer/spancontext_test.go
+++ b/ddtrace/tracer/spancontext_test.go
@@ -129,7 +129,7 @@ func TestSpanTracePushOne(t *testing.T) {
 	_, transport, flush, stop := startTestTracer(t)
 	defer stop()
 
-	traceID := random.Uint64()
+	traceID := randUint64()
 	root := newSpan("name1", "a-service", "a-resource", traceID, traceID, 0)
 	trace := root.context.trace
 
@@ -229,7 +229,7 @@ func TestSpanTracePushNoFinish(t *testing.T) {
 	assert.NotNil(buffer)
 	assert.Len(buffer.spans, 0)
 
-	traceID := random.Uint64()
+	traceID := randUint64()
 	root := newSpan("name1", "a-service", "a-resource", traceID, traceID, 0)
 	root.context.trace = buffer
 
@@ -254,7 +254,7 @@ func TestSpanTracePushSeveral(t *testing.T) {
 	assert.NotNil(buffer)
 	assert.Len(buffer.spans, 0)
 
-	traceID := random.Uint64()
+	traceID := randUint64()
 	root := trc.StartSpan("name1", WithSpanID(traceID))
 	span2 := trc.StartSpan("name2", ChildOf(root.Context()))
 	span3 := trc.StartSpan("name3", ChildOf(root.Context()))
diff --git a/ddtrace/tracer/tracer.go b/ddtrace/tracer/tracer.go
index 42e2b118ab..363bb24f31 100644
--- a/ddtrace/tracer/tracer.go
+++ b/ddtrace/tracer/tracer.go
@@ -610,13 +610,6 @@ func (t *tracer) StartSpan(operationName string, options ...ddtrace.StartSpanOpt
 	return span
 }
 
-// generateSpanID returns a random uint64 that has been XORd with the startTime.
-// This is done to get around the 32-bit random seed limitation that may create collisions if there is a large number
-// of go services all generating spans.
-func generateSpanID(startTime int64) uint64 {
-	return random.Uint64() ^ uint64(startTime)
-}
-
 // applyPPROFLabels applies pprof labels for the profiler's code hotspots and
 // endpoint filtering feature to span. When span finishes, any pprof labels
 // found in ctx are restored. Additionally, this func informs the profiler how
diff --git a/ddtrace/tracer/tracer_test.go b/ddtrace/tracer/tracer_test.go
index 4feb89c3c5..5d95ea5295 100644
--- a/ddtrace/tracer/tracer_test.go
+++ b/ddtrace/tracer/tracer_test.go
@@ -2039,6 +2039,44 @@ func BenchmarkStartSpan(b *testing.B) {
 	}
 }
 
+func BenchmarkStartSpanConcurrent(b *testing.B) {
+	tracer, _, _, stop := startTestTracer(b, WithLogger(log.DiscardLogger{}), WithSampler(NewRateSampler(0)))
+	defer stop()
+
+	var wg sync.WaitGroup
+	var wgready sync.WaitGroup
+	start := make(chan struct{})
+	for i := 0; i < 10; i++ {
+		wg.Add(1)
+		wgready.Add(1)
+		go func() {
+			defer wg.Done()
+			root := tracer.StartSpan("pylons.request", ServiceName("pylons"), ResourceName("/"))
+			ctx := ContextWithSpan(context.TODO(), root)
+			wgready.Done()
+			<-start
+			for n := 0; n < b.N; n++ {
+				s, ok := SpanFromContext(ctx)
+				if !ok {
+					b.Fatal("no span")
+				}
+				StartSpan("op", ChildOf(s.Context()))
+			}
+		}()
+	}
+	wgready.Wait()
+	b.ResetTimer()
+	close(start)
+	wg.Wait()
+}
+
+func BenchmarkGenSpanID(b *testing.B) {
+	b.ResetTimer()
+	for n := 0; n < b.N; n++ {
+		generateSpanID(0)
+	}
+}
+
 // startTestTracer returns a Tracer with a DummyTransport
 func startTestTracer(t testing.TB, opts ...StartOption) (trc *tracer, transport *dummyTransport, flush func(n int), stop func()) {
 	transport = newDummyTransport()
diff --git a/ddtrace/tracer/writer_test.go b/ddtrace/tracer/writer_test.go
index bfa3b09384..a7717c9917 100644
--- a/ddtrace/tracer/writer_test.go
+++ b/ddtrace/tracer/writer_test.go
@@ -29,7 +29,7 @@ func TestImplementsTraceWriter(t *testing.T) {
 
 // makeSpan returns a span, adding n entries to meta and metrics each.
 func makeSpan(n int) *span {
-	s := newSpan("encodeName", "encodeService", "encodeResource", random.Uint64(), random.Uint64(), random.Uint64())
+	s := newSpan("encodeName", "encodeService", "encodeResource", randUint64(), randUint64(), randUint64())
 	for i := 0; i < n; i++ {
 		istr := fmt.Sprintf("%0.10d", i)
 		s.Meta[istr] = istr