From 0a0c400335a7bf887fe61629ec738a2cd94aab69 Mon Sep 17 00:00:00 2001 From: codebien <2103732+codebien@users.noreply.github.com> Date: Thu, 24 Aug 2023 11:52:57 +0200 Subject: [PATCH] cloudv2: Higher resolution for Histogram --- output/cloud/expv2/hdr.go | 42 +++-- output/cloud/expv2/hdr_test.go | 167 +++++++++++++----- .../expv2/integration/testdata/metricset.json | 7 +- 3 files changed, 162 insertions(+), 54 deletions(-) diff --git a/output/cloud/expv2/hdr.go b/output/cloud/expv2/hdr.go index d7d351df579f..98194a2b79f2 100644 --- a/output/cloud/expv2/hdr.go +++ b/output/cloud/expv2/hdr.go @@ -9,6 +9,11 @@ import ( ) const ( + // defaultMinimumResolution is the default resolution used by histogram. + // It allows to have a higher granularity compared to the basic 1.0 value, + // supporting floating points up to 3 digits. + defaultMinimumResolution = .001 + // lowestTrackable represents the minimum value that the histogram tracks. // Essentially, it excludes negative numbers. // Most of metrics tracked by histograms are durations @@ -20,9 +25,10 @@ const ( // highestTrackable represents the maximum // value that the histogram is able to track with high accuracy (0.1% of error). - // It should be a high enough - // and rationale value for the k6 context; 2^30 = 1_073_741_824 - highestTrackable = 1 << 30 + // It represents the maximum number for int64 using the + // default minimum resolution, where it multiplies the value ingested from the histogram + // of a specific factor. Essentially, having higher resolution reduces the highest limit. + highestTrackable = 9223372036854775 // math.MaxInt64*defaultMinimumResolution ) // histogram represents a distribution @@ -61,13 +67,18 @@ type histogram struct { // Count is counts the amount of observed values. Count uint32 + + // MinimumResolution represents resolution used by Histogram. + // In principle, it is a multiplier factor for the tracked values. + MinimumResolution float64 } func newHistogram() *histogram { return &histogram{ - Buckets: make(map[uint32]uint32), - Max: -math.MaxFloat64, - Min: math.MaxFloat64, + MinimumResolution: defaultMinimumResolution, + Buckets: make(map[uint32]uint32), + Max: -math.MaxFloat64, + Min: math.MaxFloat64, } } @@ -85,6 +96,8 @@ func (h *histogram) addToBucket(v float64) { h.Count++ h.Sum += v + v /= h.MinimumResolution + if v > highestTrackable { h.ExtraHighBucket++ return @@ -151,6 +164,9 @@ func histogramAsProto(h *histogram, time int64) *pbcloud.TrendHdrValue { if h.ExtraHighBucket > 0 { hval.ExtraHighValuesCounter = &h.ExtraHighBucket } + // We don't expect to change the minimum resolution at runtime + // so a pointer is safe here + hval.MinResolution = &h.MinimumResolution return hval } @@ -164,7 +180,7 @@ func resolveBucketIndex(val float64) uint32 { // We upscale to the next integer to ensure that each sample falls // within a specific bucket, even when the value is fractional. // This avoids under-representing the distribution in the histogram. - upscaled := uint32(math.Ceil(val)) + upscaled := uint64(math.Ceil(val)) // In histograms, bucket boundaries are usually defined as multiples of powers of 2, // allowing for efficient computation of bucket indexes. @@ -181,11 +197,11 @@ func resolveBucketIndex(val float64) uint32 { // 2^10 = 1024 ~ 1000 = 10^3 // f(x) = 3*x + 1 - empiric formula that works for us // since f(2)=7 and f(3)=10 - const k = uint32(7) + const k = uint64(7) // 256 = 1 << (k+1) if upscaled < 256 { - return upscaled + return uint32(upscaled) } // `nkdiff` helps us find the right bucket for `upscaled`. It does so by determining the @@ -205,8 +221,12 @@ func resolveBucketIndex(val float64) uint32 { // = (n-k+1)<>(n-k) - (1<>(n-k) // - nkdiff := uint32(bits.Len32(upscaled>>k) - 1) // msb index - return (nkdiff << k) + (upscaled >> nkdiff) + nkdiff := uint64(bits.Len64(upscaled>>k)) - 1 // msb index + + // We cast safely downscaling because we don't expect we may hit the uint32 limit + // with the bucket index. The bucket represented from the index as MaxUint32 + // would be a very huge number bigger than the trackable limits. + return uint32((nkdiff << k) + (upscaled >> nkdiff)) } // Add implements the metricValue interface. diff --git a/output/cloud/expv2/hdr_test.go b/output/cloud/expv2/hdr_test.go index 182a1de031b9..87e4cc5ad7c1 100644 --- a/output/cloud/expv2/hdr_test.go +++ b/output/cloud/expv2/hdr_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.k6.io/k6/output/cloud/expv2/pbcloud" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -33,12 +34,24 @@ func TestResolveBucketIndex(t *testing.T) { {in: 282.29, exp: 269}, {in: 1029, exp: 512}, {in: 39751, exp: 1179}, + {in: 100000, exp: 1347}, + {in: 182272, exp: 1458}, + {in: 183000, exp: 1458}, + {in: 184000, exp: 1459}, + {in: 200000, exp: 1475}, + + {in: 1 << 20, exp: 1792}, {in: (1 << 30) - 1, exp: 3071}, - {in: (1 << 30), exp: 3072}, - {in: math.MaxInt32, exp: 3199}, + {in: 1 << 30, exp: 3072}, + {in: 1 << 40, exp: 4352}, + {in: 1 << 62, exp: 7168}, + + {in: math.MaxInt32, exp: 3199}, // 2B + {in: math.MaxUint32, exp: 3327}, // 4B + {in: math.MaxInt64, exp: 7296}, // Huge number // 9.22...e+18 } for _, tc := range tests { - assert.Equal(t, tc.exp, resolveBucketIndex(tc.in), tc.in) + assert.Equal(t, int(tc.exp), int(resolveBucketIndex(tc.in)), tc.in) } } @@ -116,9 +129,11 @@ func TestHistogramAddWithSimpleValues(t *testing.T) { t.Run(strconv.Itoa(i), func(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 for _, v := range tc.vals { h.Add(v) } + tc.exp.MinimumResolution = 1.0 assert.Equal(t, &tc.exp, h) }) } @@ -128,18 +143,20 @@ func TestHistogramAddWithUntrackables(t *testing.T) { t.Parallel() h := newHistogram() - for _, v := range []float64{5, -3.14, 2 * 1e9, 1} { + h.MinimumResolution = 1.0 + for _, v := range []float64{5, -3.14, 1<<62 + 1, 1} { h.Add(v) } exp := &histogram{ - Buckets: map[uint32]uint32{1: 1, 5: 1}, - ExtraLowBucket: 1, - ExtraHighBucket: 1, - Max: 2 * 1e9, - Min: -3.14, - Sum: 2*1e9 + 5 + 1 - 3.14, - Count: 4, + Buckets: map[uint32]uint32{1: 1, 5: 1}, + ExtraLowBucket: 1, + ExtraHighBucket: 1, + Max: 1 << 62, + Min: -3.14, + Sum: 1<<62 + 1 + 5 + 1 - 3.14, + Count: 4, + MinimumResolution: 1.0, } assert.Equal(t, exp, h) } @@ -148,6 +165,7 @@ func TestHistogramAddWithMultipleOccurances(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 for _, v := range []float64{51.8, 103.6, 103.6, 103.6, 103.6} { h.Add(v) } @@ -161,6 +179,7 @@ func TestHistogramAddWithMultipleOccurances(t *testing.T) { Sum: 466.20000000000005, Count: 5, } + exp.MinimumResolution = 1.0 assert.Equal(t, exp, h) } @@ -168,16 +187,18 @@ func TestHistogramAddWithNegativeNum(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 h.Add(-2.42314) exp := &histogram{ - Max: -2.42314, - Min: -2.42314, - Buckets: map[uint32]uint32{}, - ExtraLowBucket: 1, - ExtraHighBucket: 0, - Sum: -2.42314, - Count: 1, + Max: -2.42314, + Min: -2.42314, + Buckets: map[uint32]uint32{}, + ExtraLowBucket: 1, + ExtraHighBucket: 0, + Sum: -2.42314, + Count: 1, + MinimumResolution: 1.0, } assert.Equal(t, exp, h) } @@ -185,19 +206,22 @@ func TestHistogramAddWithNegativeNum(t *testing.T) { func TestHistogramAddWithMultipleNegativeNums(t *testing.T) { t.Parallel() h := newHistogram() + h.MinimumResolution = 1.0 for _, v := range []float64{-0.001, -0.001, -0.001} { h.Add(v) } exp := &histogram{ - Buckets: map[uint32]uint32{}, - ExtraLowBucket: 3, - ExtraHighBucket: 0, - Max: -0.001, - Min: -0.001, - Sum: -0.003, - Count: 3, + Buckets: map[uint32]uint32{}, + ExtraLowBucket: 3, + ExtraHighBucket: 0, + Max: -0.001, + Min: -0.001, + Sum: -0.003, + Count: 3, + MinimumResolution: 1.0, } + h.MinimumResolution = 1.0 assert.Equal(t, exp, h) } @@ -206,12 +230,13 @@ func TestNewHistoramWithNoVals(t *testing.T) { h := newHistogram() exp := &histogram{ - Buckets: map[uint32]uint32{}, - ExtraLowBucket: 0, - ExtraHighBucket: 0, - Max: -math.MaxFloat64, - Min: math.MaxFloat64, - Sum: 0, + Buckets: map[uint32]uint32{}, + ExtraLowBucket: 0, + ExtraHighBucket: 0, + Max: -math.MaxFloat64, + Min: math.MaxFloat64, + Sum: 0, + MinimumResolution: 0.001, } assert.Equal(t, exp, h) } @@ -224,9 +249,10 @@ func TestHistogramAsProto(t *testing.T) { } cases := []struct { - name string - vals []float64 - exp *pbcloud.TrendHdrValue + name string + vals []float64 + minResolution float64 + exp *pbcloud.TrendHdrValue }{ { name: "empty histogram", @@ -237,7 +263,7 @@ func TestHistogramAsProto(t *testing.T) { }, { name: "not trackable values", - vals: []float64{-0.23, 1<<30 + 1}, + vals: []float64{-0.23, 1<<62 + 1}, exp: &pbcloud.TrendHdrValue{ ExtraLowValuesCounter: uint32ptr(1), ExtraHighValuesCounter: uint32ptr(1), @@ -245,8 +271,8 @@ func TestHistogramAsProto(t *testing.T) { Spans: nil, Count: 2, MinValue: -0.23, - MaxValue: 1<<30 + 1, - Sum: (1 << 30) + 1 - 0.23, + MaxValue: 1<<62 + 1, + Sum: (1 << 62) + 1 - 0.23, }, }, { @@ -343,18 +369,79 @@ func TestHistogramAsProto(t *testing.T) { Sum: 56153.280000000006, }, }, + { + name: "Unrealistic", + vals: []float64{math.MaxUint32}, + exp: &pbcloud.TrendHdrValue{ + Count: 1, + ExtraLowValuesCounter: nil, + ExtraHighValuesCounter: nil, + Counters: []uint32{1}, + Spans: []*pbcloud.BucketSpan{ + { + Offset: 3327, + Length: 1, + }, + }, + MinValue: math.MaxUint32, + MaxValue: math.MaxUint32, + Sum: math.MaxUint32, + }, + }, + { + name: "DefaultMinimumResolution", + vals: []float64{200, 100, 200.1}, + minResolution: .001, + exp: &pbcloud.TrendHdrValue{ + Count: 3, + ExtraLowValuesCounter: nil, + ExtraHighValuesCounter: nil, + MinResolution: float64ptr(defaultMinimumResolution), + Counters: []uint32{1, 2}, + Spans: []*pbcloud.BucketSpan{ + { + Offset: 1347, + Length: 1, + }, + { + Offset: 127, + Length: 1, + }, + }, + MinValue: 100, + MaxValue: 200.1, + Sum: 500.1, + }, + }, } - for i, tc := range cases { + for _, tc := range cases { tc := tc - t.Run(strconv.Itoa(i), func(t *testing.T) { + t.Run(tc.name, func(t *testing.T) { t.Parallel() + h := newHistogram() + // TODO: refactor + // An hack for preserving as the default for the tests the old value 1.0 + if tc.minResolution == 0 { + tc.minResolution = 1.0 + tc.exp.MinResolution = float64ptr(1.0) + } + h.MinimumResolution = tc.minResolution + for _, v := range tc.vals { h.Add(v) } tc.exp.Time = ×tamppb.Timestamp{Seconds: 1} - assert.Equal(t, tc.exp, histogramAsProto(h, time.Unix(1, 0).UnixNano()), tc.name) + hproto := histogramAsProto(h, time.Unix(1, 0).UnixNano()) + require.Equal(t, tc.exp.Count, hproto.Count) + require.Equal(t, tc.exp.Counters, hproto.Counters) + require.Equal(t, len(tc.exp.Spans), len(hproto.Spans)) + assert.Equal(t, tc.exp, hproto, tc.name) }) } } + +func float64ptr(n float64) *float64 { + return &n +} diff --git a/output/cloud/expv2/integration/testdata/metricset.json b/output/cloud/expv2/integration/testdata/metricset.json index 1bd42ebbdb46..eb283095caa8 100644 --- a/output/cloud/expv2/integration/testdata/metricset.json +++ b/output/cloud/expv2/integration/testdata/metricset.json @@ -94,13 +94,14 @@ ], "spans": [ { - "offset": 6, + "offset": 827, "length": 1 } ], "maxValue": 6, "minValue": 6, - "sum": 6 + "sum": 6, + "minResolution": 0.001 } ] } @@ -108,4 +109,4 @@ ] } ] -} \ No newline at end of file +}