Skip to content

Commit

Permalink
[query] Improve precision for variance and stddev of equal values (#2799
Browse files Browse the repository at this point in the history
)
  • Loading branch information
vpranckaitis authored Oct 29, 2020
1 parent 0c58de6 commit 72fd76b
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 17 deletions.
35 changes: 26 additions & 9 deletions src/query/functions/aggregation/function.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,24 +122,41 @@ func stddevFn(values []float64, bucket []int) float64 {
}

func varianceFn(values []float64, bucket []int) float64 {
sum, count := sumAndCount(values, bucket)

// Cannot take population standard deviation of less than 1 value
if count < 1 {
if len(values) == 0 || len(bucket) == 0 {
return math.NaN()
}

average := sum / count
sumOfSquares := 0.0
// Using Welford's online algorithm for calculating variance
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
//
// This algorithm is used in Prometheus and also should provide better numerical precision than
// the straight-forward implementation of the variance formula. The algorithm iterates through the values
// and at the each step recalculates mean and variance of the values seen so far.

var (
count = 0
partialMean = 0.0
partialVarTimesCount = 0.0 // for better precision, calculate `variance * count` and divide at the end
)

for _, idx := range bucket {
v := values[idx]
if !math.IsNaN(v) {
diff := v - average
sumOfSquares += diff * diff
count++

delta1 := v - partialMean
partialMean += delta1 / float64(count)
delta2 := v - partialMean

partialVarTimesCount += delta1 * delta2
}
}

return sumOfSquares / count
if count < 1 {
return math.NaN()
}

return partialVarTimesCount / float64(count)
}

func countFn(values []float64, bucket []int) float64 {
Expand Down
41 changes: 41 additions & 0 deletions src/query/functions/aggregation/function_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"testing"

"github.com/m3db/m3/src/query/test"
"github.com/stretchr/testify/assert"
)

type funcTest struct {
Expand Down Expand Up @@ -161,3 +162,43 @@ func TestAggFns(t *testing.T) {
}
}
}

var equalValuePrecisionTest = []struct {
name string
values []float64
}{
{
"five 1.33e-5",
[]float64{1.33e-5, 1.33e-5, 1.33e-5, 1.33e-5, 1.33e-5},
},
{
"three 13.3",
[]float64{13.3, 13.3, 13.3},
},
}

func TestVarianceFnEqualValuePrecision(t *testing.T) {
for _, tt := range equalValuePrecisionTest {
t.Run(tt.name, func(t *testing.T) {
bucket := make([]int, len(tt.values))
for i := range bucket {
bucket[i] = i
}

assert.Equal(t, 0.0, varianceFn(tt.values, bucket))
})
}
}

func TestStddevFnEqualValuePrecision(t *testing.T) {
for _, tt := range equalValuePrecisionTest {
t.Run(tt.name, func(t *testing.T) {
bucket := make([]int, len(tt.values))
for i := range bucket {
bucket[i] = i
}

assert.Equal(t, 0.0, stddevFn(tt.values, bucket))
})
}
}
4 changes: 2 additions & 2 deletions src/query/test/compatibility/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ func (t *Test) clear() error {
func (t *Test) Close() {
}

// samplesAlmostEqual returns true if the two sample lines only differ by a
// almostEqual returns true if the two sample lines only differ by a
// small relative error in their sample value.
func almostEqual(a, b float64) bool {
// NaN has no equality but for testing we still want to know whether both values
Expand All @@ -562,7 +562,7 @@ func almostEqual(a, b float64) bool {
diff := math.Abs(a - b)

if a == 0 || b == 0 || diff < minNormal {
return diff < epsilon
return diff < epsilon*minNormal
}
return diff/(math.Abs(a)+math.Abs(b)) < epsilon
}
Expand Down
10 changes: 4 additions & 6 deletions src/query/test/compatibility/testdata/aggregators.test
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,11 @@ load 5m
http_requests{job="api-server", instance="1", group="production"} 0+1.33x10
http_requests{job="api-server", instance="0", group="canary"} 0+1.33x10

# FAILING issue #10. (it is almost zero)
#eval instant at 50m stddev(http_requests)
# {} 0.0
eval instant at 50m stddev(http_requests)
{} 0.0

# FAILING issue #11. (it is almost zero)
#eval instant at 50m stdvar(http_requests)
# {} 0.0
eval instant at 50m stdvar(http_requests)
{} 0.0



Expand Down

0 comments on commit 72fd76b

Please sign in to comment.