From c9d425448326b11811be9f79244e514ccb6f108d Mon Sep 17 00:00:00 2001 From: jmacd Date: Sun, 3 Nov 2019 09:22:25 -0800 Subject: [PATCH 01/30] Add go module, add simple sampler helper --- LICENSE | 21 +++++++++++++++++ README.md | 13 ++++++++++- go.mod | 5 +++++ go.sum | 10 +++++++++ simple.go | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ simple_test.go | 38 +++++++++++++++++++++++++++++++ varopt.go | 12 ++++++---- varopt_test.go | 36 +++++++++++++++-------------- 8 files changed, 174 insertions(+), 22 deletions(-) create mode 100644 LICENSE create mode 100644 go.mod create mode 100644 go.sum create mode 100644 simple.go create mode 100644 simple_test.go diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..017c0cd --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2019 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index c9770ee..0c2992f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,14 @@ This is an implementation of VarOpt, an unbiased weighted sampling algorithm described in the paper [Stream sampling for variance-optimal -estimation of subset sums](https://arxiv.org/pdf/0803.0473.pdf). +estimation of subset sums](https://arxiv.org/pdf/0803.0473.pdf) (2008) +by Edith Cohen, Nick Duffield, Haim Kaplan, Carsten Lund, and Mikkel +Thorup. + +VarOpt is a reservoir-type sampler that maintains a fixed size sample +and provides a mechanism for merging unequal-weight samples. + +This package also includes a simple reservoir sampling algorithm, +often useful in conjunction with weighed reservoir sampling, using +Algorithm R from [Random sampling with a +reservoir](https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R) +(1985) by Jeffrey Vitter. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1f0ead4 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/lightstep/varopt + +go 1.13 + +require github.com/stretchr/testify v1.4.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e863f51 --- /dev/null +++ b/go.sum @@ -0,0 +1,10 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/simple.go b/simple.go new file mode 100644 index 0000000..7d0fb7f --- /dev/null +++ b/simple.go @@ -0,0 +1,61 @@ +// Copyright 2019, LightStep Inc. + +package varopt + +import ( + "math/rand" +) + +// Simple implements unweighted reservoir sampling using Algorithm R +// from "Random sampling with a reservoir" by Jeffrey Vitter (1985) +// https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R +type Simple struct { + capacity int + observed int + buffer []Sample +} + +func NewSimple(capacity int) *Simple { + return &Simple{ + capacity: capacity, + } +} + +func (s *Simple) Add(span Sample) { + s.observed++ + + if s.buffer == nil { + s.buffer = make([]Sample, 0, s.capacity) + } + + if len(s.buffer) < s.capacity { + s.buffer = append(s.buffer, span) + return + } + + // Give this a capacity/observed chance of replacing an existing entry. + index := rand.Intn(s.observed) + if index < s.capacity { + s.buffer[index] = span + } +} + +func (s *Simple) Get(i int) Sample { + return s.buffer[i] +} + +func (s *Simple) Size() int { + return len(s.buffer) +} + +func (s *Simple) Weight() float64 { + return float64(s.observed) / float64(s.Size()) +} + +func (s *Simple) Prob() float64 { + return 1 / s.Weight() +} + +func (s *Simple) Observed() int { + return s.observed +} diff --git a/simple_test.go b/simple_test.go new file mode 100644 index 0000000..8c83500 --- /dev/null +++ b/simple_test.go @@ -0,0 +1,38 @@ +// Copyright 2019, LightStep Inc. + +package varopt_test + +import ( + "testing" + + "github.com/lightstep/varopt" + "github.com/stretchr/testify/require" +) + +type iRec int + +func TestSimple(t *testing.T) { + const ( + popSize = 1e6 + sampleProb = 0.1 + sampleSize int = popSize * sampleProb + epsilon = 0.01 + ) + + ss := varopt.NewSimple(sampleSize) + + psum := 0. + for i := 0; i < popSize; i++ { + ss.Add(iRec(i)) + psum += float64(i) + } + + require.Equal(t, ss.Size(), sampleSize) + + ssum := 0.0 + for i := 0; i < sampleSize; i++ { + ssum += float64(ss.Get(i).(iRec)) + } + + require.InEpsilon(t, ssum/float64(ss.Size()), psum/popSize, epsilon) +} diff --git a/varopt.go b/varopt.go index d3cd2f1..e02d428 100644 --- a/varopt.go +++ b/varopt.go @@ -1,7 +1,4 @@ -// Stream sampling for variance-optimal estimation of subset sums -// Edith Cohen, Nick Duffield, Haim Kaplan, Carsten Lund, Mikkel Thorup -// 2008 -// https://arxiv.org/pdf/0803.0473.pdf +// Copyright 2019, LightStep Inc. package varopt @@ -11,6 +8,11 @@ import ( "math/rand" ) +// Varopt implements the algorithm from Stream sampling for +// variance-optimal estimation of subset sums Edith Cohen, Nick +// Duffield, Haim Kaplan, Carsten Lund, Mikkel Thorup 2008 +// +// https://arxiv.org/pdf/0803.0473.pdf type Varopt struct { // Large-weight items L largeHeap @@ -31,6 +33,8 @@ type Varopt struct { totalWeight float64 } +type Sample interface{} + type vsample struct { sample Sample weight float64 diff --git a/varopt_test.go b/varopt_test.go index 4a19b80..108aeba 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -1,9 +1,12 @@ +// Copyright 2019, LightStep Inc. + package varopt_test import ( "math" "testing" + "github.com/lightstep/varopt" "github.com/stretchr/testify/require" ) @@ -11,9 +14,8 @@ import ( // There are odd and even numbers, in equal amount // There are last-digits 0-9 in equal amount // -// Much like simple_test.go, we will test the mean is correct and, -// because unbiased, also the odd/even and last-digit-0-9 groupings -// will be balanced. +// We will test the mean is correct and, because unbiased, also the +// odd/even and last-digit-0-9 groupings will be balanced. const ( numBlocks = 100 popSize = 1e7 @@ -53,7 +55,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { extra = popSize - bigSize*numBig - smallSize*numSmall ) - population := make([]Sample, popSize) + population := make([]varopt.Sample, popSize) psum := 0.0 @@ -67,17 +69,17 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { // population[i], population[j] = population[j], population[i] // }) - smallBlocks := make([][]Sample, numSmall) - bigBlocks := make([][]Sample, numBig) + smallBlocks := make([][]varopt.Sample, numSmall) + bigBlocks := make([][]varopt.Sample, numBig) for i := 0; i < numSmall; i++ { - smallBlocks[i] = make([]Sample, smallSize) + smallBlocks[i] = make([]varopt.Sample, smallSize) } for i := 0; i < numBig; i++ { if i == 0 { - bigBlocks[0] = make([]Sample, bigSize+extra) + bigBlocks[0] = make([]varopt.Sample, bigSize+extra) } else { - bigBlocks[i] = make([]Sample, bigSize) + bigBlocks[i] = make([]varopt.Sample, bigSize) } } @@ -98,13 +100,13 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { maxDiff := 0.0 - func(allBlockLists ...[][][]Sample) { + func(allBlockLists ...[][][]varopt.Sample) { for _, blockLists := range allBlockLists { - varopt := NewVaropt(sampleSize) + vsample := varopt.NewVaropt(sampleSize) for _, blockList := range blockLists { for _, block := range blockList { - simple := NewSimple(sampleSize) + simple := varopt.NewSimple(sampleSize) for _, s := range block { simple.Add(s) @@ -112,7 +114,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { weight := simple.Weight() for i := 0; i < simple.Size(); i++ { - varopt.Add(simple.Get(i), weight) + vsample.Add(simple.Get(i), weight) } } } @@ -121,8 +123,8 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { odd := 0.0 even := 0.0 - for i := 0; i < varopt.Size(); i++ { - v, w := varopt.Get(i) + for i := 0; i < vsample.Size(); i++ { + v, w := vsample.Get(i) vi := v.(int) if vi%2 == 0 { even++ @@ -140,7 +142,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { require.InEpsilon(t, odd, even, epsilon) } }( - [][][]Sample{bigBlocks, smallBlocks}, - [][][]Sample{smallBlocks, bigBlocks}, + [][][]varopt.Sample{bigBlocks, smallBlocks}, + [][][]varopt.Sample{smallBlocks, bigBlocks}, ) } From 865dd6a734a1360e4cce64e17d2f53386456248c Mon Sep 17 00:00:00 2001 From: jmacd Date: Thu, 7 Nov 2019 16:45:46 -0800 Subject: [PATCH 02/30] NewVaropt->New --- README.md | 2 +- varopt.go | 9 ++------- varopt_test.go | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0c2992f..1cdb63c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ estimation of subset sums](https://arxiv.org/pdf/0803.0473.pdf) (2008) by Edith Cohen, Nick Duffield, Haim Kaplan, Carsten Lund, and Mikkel Thorup. -VarOpt is a reservoir-type sampler that maintains a fixed size sample +VarOpt is a reservoir-type sampler that maintains a fixed-size sample and provides a mechanism for merging unequal-weight samples. This package also includes a simple reservoir sampling algorithm, diff --git a/varopt.go b/varopt.go index e02d428..3852d8c 100644 --- a/varopt.go +++ b/varopt.go @@ -42,13 +42,8 @@ type vsample struct { type largeHeap []vsample -func NewVaropt(capacity int) *Varopt { - v := InitVaropt(capacity) - return &v -} - -func InitVaropt(capacity int) Varopt { - return Varopt{ +func New(capacity int) *Varopt { + return &Varopt{ capacity: capacity, } } diff --git a/varopt_test.go b/varopt_test.go index 108aeba..3cebe7c 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -102,7 +102,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { func(allBlockLists ...[][][]varopt.Sample) { for _, blockLists := range allBlockLists { - vsample := varopt.NewVaropt(sampleSize) + vsample := varopt.New(sampleSize) for _, blockList := range blockLists { for _, block := range blockList { From 3dfa2d1ac5a721e1026bfd01fbc23c00c2245cb0 Mon Sep 17 00:00:00 2001 From: jmacd Date: Thu, 7 Nov 2019 22:49:09 -0800 Subject: [PATCH 03/30] Add testable examples --- frequency_test.go | 143 ++++++++++++++++++++++++++++++++++++++++++++++ varopt.go | 10 +++- varopt_test.go | 4 +- weighted_test.go | 82 ++++++++++++++++++++++++++ 4 files changed, 235 insertions(+), 4 deletions(-) create mode 100644 frequency_test.go create mode 100644 weighted_test.go diff --git a/frequency_test.go b/frequency_test.go new file mode 100644 index 0000000..9da7bca --- /dev/null +++ b/frequency_test.go @@ -0,0 +1,143 @@ +// Copyright 2019, LightStep Inc. + +package varopt_test + +import ( + "fmt" + "math" + "math/rand" + + "github.com/lightstep/varopt" +) + +type curve struct { + color string + mean float64 + stddev float64 +} + +type point struct { + color int + xvalue float64 +} + +var colors = []curve{ + {color: "red", mean: 10, stddev: 15}, + {color: "green", mean: 30, stddev: 10}, + {color: "blue", mean: 50, stddev: 20}, +} + +// This example shows how to use Varopt sampling to estimate +// frequencies with the use of inverse probability weights. The use +// of inverse probability creates a uniform expected value, in this of +// the number of sample points per second. +// +// While the number of expected points per second is uniform, the +// output sample weights are expected to match the original +// frequencies. +func ExampleFrequency() { + // Number of points. + const totalCount = 1e6 + + // Relative size of the sample. + const sampleRatio = 0.01 + + // Ensure this test is deterministic. + rnd := rand.New(rand.NewSource(104729)) + + // Construct a timeseries consisting of three colored signals, + // for x=0 to x=60 seconds. + var points []point + + // origCounts stores the original signals at second granularity. + origCounts := make([][]int, len(colors)) + for i := range colors { + origCounts[i] = make([]int, 60) + } + + // Construct the signals by choosing a random color, then + // using its Gaussian to compute a timestamp. + for len(points) < totalCount { + choose := rnd.Intn(len(colors)) + series := colors[choose] + xvalue := rnd.NormFloat64()*series.stddev + series.mean + + if xvalue < 0 || xvalue > 60 { + continue + } + origCounts[choose][int(math.Floor(xvalue))]++ + points = append(points, point{ + color: choose, + xvalue: xvalue, + }) + } + + // Compute the total number of points per second. This will be + // used to establish the per-second probability. + xcount := make([]int, 60) + for _, point := range points { + xcount[int(math.Floor(point.xvalue))]++ + } + + // Compute the sample with using the inverse probability as a + // weight. This ensures a uniform distribution of points in each + // second. + sampleSize := int(sampleRatio * float64(totalCount)) + sampler := varopt.New(sampleSize, rnd) + for _, point := range points { + second := int(math.Floor(point.xvalue)) + prob := float64(xcount[second]) / float64(totalCount) + sampler.Add(point, 1/prob) + } + + // sampleCounts stores the reconstructed signals. + sampleCounts := make([][]float64, len(colors)) + for i := range colors { + sampleCounts[i] = make([]float64, 60) + } + + // pointCounts stores the number of points per second. + pointCounts := make([]int, 60) + + // Reconstruct the signals using the output sample weights. + // The effective count of each sample point is its output + // weight divided by its original weight. + for i := 0; i < sampler.Size(); i++ { + sample, weight := sampler.Get(i) + origWeight := sampler.GetOriginalWeight(i) + point := sample.(point) + second := int(math.Floor(point.xvalue)) + sampleCounts[point.color][second] += (weight / origWeight) + pointCounts[second]++ + } + + // Compute standard deviation of sample points per second. + sum := 0.0 + mean := float64(sampleSize) / 60 + for s := 0; s < 60; s++ { + e := float64(pointCounts[s]) - mean + sum += e * e + } + stddev := math.Sqrt(sum / (60 - 1)) + + fmt.Printf("Samples per second mean %.2f\n", mean) + fmt.Printf("Samples per second standard deviation %.2f\n", stddev) + + // Compute mean absolute percentage error between sampleCounts + // and origCounts for each signal. + for c := range colors { + mae := 0.0 + for s := 0; s < 60; s++ { + mae += math.Abs(sampleCounts[c][s]-float64(origCounts[c][s])) / float64(origCounts[c][s]) + } + mae /= 60 + fmt.Printf("Mean absolute percentage error (%s) = %.2f%%\n", colors[c].color, mae*100) + } + + // Output: + // Samples per second mean 166.67 + // Samples per second standard deviation 13.75 + // Mean absolute percentage error (red) = 25.16% + // Mean absolute percentage error (green) = 14.30% + // Mean absolute percentage error (blue) = 14.23% +} diff --git a/varopt.go b/varopt.go index 3852d8c..4dd8ea3 100644 --- a/varopt.go +++ b/varopt.go @@ -14,6 +14,9 @@ import ( // // https://arxiv.org/pdf/0803.0473.pdf type Varopt struct { + // Random number generator + rnd *rand.Rand + // Large-weight items L largeHeap @@ -42,9 +45,10 @@ type vsample struct { type largeHeap []vsample -func New(capacity int) *Varopt { +func New(capacity int, rnd *rand.Rand) *Varopt { return &Varopt{ capacity: capacity, + rnd: rnd, } } @@ -99,7 +103,7 @@ func (s *Varopt) Add(sample Sample, weight float64) { } s.X = s.X[:len(s.X)-1] } else { - ti := rand.Intn(len(s.T)) + ti := s.rnd.Intn(len(s.T)) s.T[ti], s.T[len(s.T)-1] = s.T[len(s.T)-1], s.T[ti] s.T = s.T[:len(s.T)-1] } @@ -109,7 +113,7 @@ func (s *Varopt) Add(sample Sample, weight float64) { func (s *Varopt) uniform() float64 { for { - r := rand.Float64() + r := s.rnd.Float64() if r != 0.0 { return r } diff --git a/varopt_test.go b/varopt_test.go index 3cebe7c..16786ab 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -4,6 +4,7 @@ package varopt_test import ( "math" + "math/rand" "testing" "github.com/lightstep/varopt" @@ -99,10 +100,11 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { require.Equal(t, len(population), pos) maxDiff := 0.0 + rnd := rand.New(rand.NewSource(98887)) func(allBlockLists ...[][][]varopt.Sample) { for _, blockLists := range allBlockLists { - vsample := varopt.New(sampleSize) + vsample := varopt.New(sampleSize, rnd) for _, blockList := range blockLists { for _, block := range blockList { diff --git a/weighted_test.go b/weighted_test.go new file mode 100644 index 0000000..e545fbd --- /dev/null +++ b/weighted_test.go @@ -0,0 +1,82 @@ +// Copyright 2019, LightStep Inc. + +package varopt_test + +import ( + "fmt" + "math" + "math/rand" + + "github.com/lightstep/varopt" +) + +type packet struct { + size int + color string + protocol string +} + +func ExampleWeighted() { + const totalPackets = 1e6 + const sampleRatio = 0.01 + + colors := []string{"red", "green", "blue"} + protocols := []string{"http", "tcp", "udp"} + + sizeByColor := map[string]int{} + sizeByProtocol := map[string]int{} + trueTotalWeight := 0.0 + + rnd := rand.New(rand.NewSource(32491)) + sampler := varopt.New(totalPackets*sampleRatio, rnd) + + for i := 0; i < totalPackets; i++ { + packet := packet{ + size: 1 + rnd.Intn(100000), + color: colors[rnd.Intn(len(colors))], + protocol: protocols[rnd.Intn(len(protocols))], + } + + sizeByColor[packet.color] += packet.size + sizeByProtocol[packet.protocol] += packet.size + trueTotalWeight += float64(packet.size) + + sampler.Add(packet, float64(packet.size)) + } + + estSizeByColor := map[string]float64{} + estSizeByProtocol := map[string]float64{} + estTotalWeight := 0.0 + + for i := 0; i < sampler.Size(); i++ { + sample, weight := sampler.Get(i) + packet := sample.(packet) + estSizeByColor[packet.color] += weight + estSizeByProtocol[packet.protocol] += weight + estTotalWeight += weight + } + + // Compute mean average percentage error for colors + colorMape := 0.0 + for _, c := range colors { + colorMape += math.Abs(float64(sizeByColor[c])-estSizeByColor[c]) / float64(sizeByColor[c]) + } + colorMape /= float64(len(colors)) + + // Compute mean average percentage error for protocols + protocolMape := 0.0 + for _, p := range protocols { + protocolMape += math.Abs(float64(sizeByProtocol[p])-estSizeByProtocol[p]) / float64(sizeByProtocol[p]) + } + protocolMape /= float64(len(protocols)) + + // Compute total sum error percentage + fmt.Printf("Total sum error %.2g%%\n", 100*math.Abs(estTotalWeight-trueTotalWeight)/trueTotalWeight) + fmt.Printf("Color mean absolute percentage error %.2f%%\n", 100*colorMape) + fmt.Printf("Protocol mean absolute percentage error %.2f%%\n", 100*protocolMape) + + // Output: + // Total sum error 2.4e-11% + // Color mean absolute percentage error 0.73% + // Protocol mean absolute percentage error 1.62% +} From 2be048e87da0b605d77e8960a67090ebd177bc39 Mon Sep 17 00:00:00 2001 From: jmacd Date: Thu, 7 Nov 2019 23:05:28 -0800 Subject: [PATCH 04/30] Update README with examples and godoc link --- README.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/README.md b/README.md index 1cdb63c..82cc1c4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +[![Docs](https://godoc.org/github.com/lightstep/varopt?status.svg)](https://godoc.org/github.com/lightstep/varopt) + +# VarOpt Sampling Algorithm + This is an implementation of VarOpt, an unbiased weighted sampling algorithm described in the paper [Stream sampling for variance-optimal estimation of subset sums](https://arxiv.org/pdf/0803.0473.pdf) (2008) @@ -12,3 +16,35 @@ often useful in conjunction with weighed reservoir sampling, using Algorithm R from [Random sampling with a reservoir](https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R) (1985) by Jeffrey Vitter. + +# Usage: Natural Weights + +A typical use of VarOpt sampling is to estimate network flows using +sample packets. In this use-case, the weight applied to each sample +is the size of the packet. Beacuse VarOpt computes an unbiased +sample, the sample data points can be summarized along secondary +dimensions. For example, we can select a subset of the sample +according to a secondary attribute, sum the sample weights, and the +result is expected value of the secondary attribute in the original +population. + +See [weighted_test.go](https://github.com/lightstep/varopt/blob/master/weighted_test.go) for an example. + +# Usage: Inverse-probability Weights + +Another use for VarOpt sampling uses inverse-probability weights to +estimate frequencies while simultaneously controlling sample +diversity. Suppose a sequence of observations can be naturally +categorized into N different buckets. The goal in this case is to +compute a sample where each bucket is well represented, while +maintaining frequency estimates. + +In this use-case, the weight assigned to each observation is the +inverse probability of the bucket it belongs to. The result of +weighted sampling with inverse-probability weights is a uniform +expectation, in this example we expect an equal number of observations +falling into each bucket. Each observation represents a frequency of +its sample weight (computed by VarOpt) divided by its original weight +(the inverse-probability). + +See [frequency_test.go](https://github.com/lightstep/varopt/blob/master/frequency_test.go) for an example. From 08e548be48e5ad1232d9e340f17e96da3bc660c1 Mon Sep 17 00:00:00 2001 From: jmacd Date: Thu, 7 Nov 2019 23:08:10 -0800 Subject: [PATCH 05/30] Update --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 82cc1c4..ba73609 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Algorithm R from [Random sampling with a reservoir](https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R) (1985) by Jeffrey Vitter. -# Usage: Natural Weights +## Usage: Natural Weights A typical use of VarOpt sampling is to estimate network flows using sample packets. In this use-case, the weight applied to each sample @@ -30,7 +30,7 @@ population. See [weighted_test.go](https://github.com/lightstep/varopt/blob/master/weighted_test.go) for an example. -# Usage: Inverse-probability Weights +## Usage: Inverse-probability Weights Another use for VarOpt sampling uses inverse-probability weights to estimate frequencies while simultaneously controlling sample @@ -48,3 +48,11 @@ its sample weight (computed by VarOpt) divided by its original weight (the inverse-probability). See [frequency_test.go](https://github.com/lightstep/varopt/blob/master/frequency_test.go) for an example. + +## Usage: Merging Samples + +VarOpt supports merging independently collected samples one +observation at a time. This is useful for building distributed +sampling schemes. In this use-case, each node in a distributed system +computes a weighted sample. To combine samples, simply input all the +observations and their corresponding weights into a new VarOpt sample. \ No newline at end of file From 43a69f477853bd6cc3828f5ff0bb10d4fec68334 Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 8 Nov 2019 09:34:51 -0800 Subject: [PATCH 06/30] Use a deterministic random source; name examples for godoc --- frequency_test.go | 2 +- simple.go | 6 ++++-- simple_test.go | 5 ++++- varopt_test.go | 6 ++---- weighted_test.go | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/frequency_test.go b/frequency_test.go index 9da7bca..e4b3429 100644 --- a/frequency_test.go +++ b/frequency_test.go @@ -35,7 +35,7 @@ var colors = []curve{ // While the number of expected points per second is uniform, the // output sample weights are expected to match the original // frequencies. -func ExampleFrequency() { +func ExampleVaropt_GetOriginalWeight() { // Number of points. const totalCount = 1e6 diff --git a/simple.go b/simple.go index 7d0fb7f..d34d42a 100644 --- a/simple.go +++ b/simple.go @@ -13,11 +13,13 @@ type Simple struct { capacity int observed int buffer []Sample + rnd *rand.Rand } -func NewSimple(capacity int) *Simple { +func NewSimple(capacity int, rnd *rand.Rand) *Simple { return &Simple{ capacity: capacity, + rnd: rnd, } } @@ -34,7 +36,7 @@ func (s *Simple) Add(span Sample) { } // Give this a capacity/observed chance of replacing an existing entry. - index := rand.Intn(s.observed) + index := s.rnd.Intn(s.observed) if index < s.capacity { s.buffer[index] = span } diff --git a/simple_test.go b/simple_test.go index 8c83500..c70503e 100644 --- a/simple_test.go +++ b/simple_test.go @@ -3,6 +3,7 @@ package varopt_test import ( + "math/rand" "testing" "github.com/lightstep/varopt" @@ -19,7 +20,9 @@ func TestSimple(t *testing.T) { epsilon = 0.01 ) - ss := varopt.NewSimple(sampleSize) + rnd := rand.New(rand.NewSource(17167)) + + ss := varopt.NewSimple(sampleSize, rnd) psum := 0. for i := 0; i < popSize; i++ { diff --git a/varopt_test.go b/varopt_test.go index 16786ab..3d6eec3 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -23,9 +23,7 @@ const ( sampleProb = 0.001 sampleSize int = popSize * sampleProb - // TODO epsilon is somewhat variable b/c we're using the - // static rand w/o a fixed seed for the test. - epsilon = 0.06 + epsilon = 0.08 ) func TestUnbiased(t *testing.T) { @@ -108,7 +106,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { for _, blockList := range blockLists { for _, block := range blockList { - simple := varopt.NewSimple(sampleSize) + simple := varopt.NewSimple(sampleSize, rnd) for _, s := range block { simple.Add(s) diff --git a/weighted_test.go b/weighted_test.go index e545fbd..1e82043 100644 --- a/weighted_test.go +++ b/weighted_test.go @@ -16,7 +16,7 @@ type packet struct { protocol string } -func ExampleWeighted() { +func ExampleNew() { const totalPackets = 1e6 const sampleRatio = 0.01 From 68f1faba1c25c8dcf24ae854b7d0cf1ff20205ac Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 8 Nov 2019 09:50:25 -0800 Subject: [PATCH 07/30] Add comments --- doc.go | 22 ++++++++++++++++++++++ simple.go | 15 ++++++++++----- varopt.go | 18 ++++++++++++++++++ 3 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 doc.go diff --git a/doc.go b/doc.go new file mode 100644 index 0000000..deb2de9 --- /dev/null +++ b/doc.go @@ -0,0 +1,22 @@ +// Copyright 2019, LightStep Inc. + +/* +Package varopt is an implementation of VarOpt, an unbiased weighted +sampling algorithm described in the paper "Stream sampling for +variance-optimal estimation of subset sums" +https://arxiv.org/pdf/0803.0473.pdf (2008), by Edith Cohen, Nick +Duffield, Haim Kaplan, Carsten Lund, and Mikkel Thorup. + +VarOpt is a reservoir-type sampler that maintains a fixed-size sample +and provides a mechanism for merging unequal-weight samples. + +This package also includes a simple reservoir sampling algorithm, +often useful in conjunction with weighed reservoir sampling, using +Algorithm R from "Random sampling with a +reservoir", https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R +(1985), by Jeffrey Vitter. + +See https://github.com/lightstep/varopt/blob/master/README.md for +more detail. +*/ +package varopt diff --git a/simple.go b/simple.go index d34d42a..6ff64f1 100644 --- a/simple.go +++ b/simple.go @@ -16,6 +16,8 @@ type Simple struct { rnd *rand.Rand } +// NewSimple returns a simple reservoir sampler with given capacity +// (i.e., reservoir size) and random number generator. func NewSimple(capacity int, rnd *rand.Rand) *Simple { return &Simple{ capacity: capacity, @@ -23,6 +25,8 @@ func NewSimple(capacity int, rnd *rand.Rand) *Simple { } } +// Add considers a new observation for the sample. Items have unit +// weight. func (s *Simple) Add(span Sample) { s.observed++ @@ -42,22 +46,23 @@ func (s *Simple) Add(span Sample) { } } +// Get returns the i'th selected item from the sample. func (s *Simple) Get(i int) Sample { return s.buffer[i] } +// Get returns the number of items in the sample. If the reservoir is +// full, Size() equals Capacity(). func (s *Simple) Size() int { return len(s.buffer) } +// Weight returns the adjusted weight of each item in the sample. func (s *Simple) Weight() float64 { return float64(s.observed) / float64(s.Size()) } -func (s *Simple) Prob() float64 { - return 1 / s.Weight() -} - -func (s *Simple) Observed() int { +// Count returns the number of items that were observed. +func (s *Simple) Count() int { return s.observed } diff --git a/varopt.go b/varopt.go index 4dd8ea3..1856614 100644 --- a/varopt.go +++ b/varopt.go @@ -36,6 +36,9 @@ type Varopt struct { totalWeight float64 } +// Sample is an empty interface that represents a sample item. +// Sampling algorithms treat these as opaque, as their weight is +// passed in separately. type Sample interface{} type vsample struct { @@ -45,6 +48,8 @@ type vsample struct { type largeHeap []vsample +// New returns a new Varopt sampler with given capacity (i.e., +// reservoir size) and random number generator. func New(capacity int, rnd *rand.Rand) *Varopt { return &Varopt{ capacity: capacity, @@ -52,6 +57,7 @@ func New(capacity int, rnd *rand.Rand) *Varopt { } } +// Add considers a new observation for the sample with given weight. func (s *Varopt) Add(sample Sample, weight float64) { individual := vsample{ sample: sample, @@ -131,6 +137,9 @@ func (s *Varopt) Get(i int) (Sample, float64) { return s.T[i-len(s.L)].sample, s.tau } +// GetOriginalWeight returns the original input weight of the sample +// item that was passed to Add(). This can be useful for computing a +// frequency from the adjusted sample weight. func (s *Varopt) GetOriginalWeight(i int) float64 { if i < len(s.L) { return s.L[i].weight @@ -139,22 +148,31 @@ func (s *Varopt) GetOriginalWeight(i int) float64 { return s.T[i-len(s.L)].weight } +// Capacity returns the size of the reservoir. This is the maximum +// size of the sample. func (s *Varopt) Capacity() int { return s.capacity } +// Size returns the current number of items in the sample. If the +// reservoir is full, this returns Capacity(). func (s *Varopt) Size() int { return len(s.L) + len(s.T) } +// TotalWeight returns the sum of weights that were passed to Add(). func (s *Varopt) TotalWeight() float64 { return s.totalWeight } +// TotalCount returns the number of calls to Add(). func (s *Varopt) TotalCount() int { return s.totalCount } +// Tau returns the current large-weight threshold. Weights larger +// than Tau() carry their exact weight int he sample. See the VarOpt +// paper for details. func (s *Varopt) Tau() float64 { return s.tau } From 5c2a41c30ef4fb1688346f362a18788ff296069e Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 8 Nov 2019 09:58:00 -0800 Subject: [PATCH 08/30] Add circle config --- .circleci/config.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..1e65ea3 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,21 @@ +version: 2 + +jobs: + test: + working_directory: ~/go/src/github.com/lightstep/varopt + docker: + - image: circleci/golang:1.13 + steps: + - run: + name: "configure environment" + command: | + echo 'export GOPATH="$HOME/go"' >> $BASH_ENV + source $BASH_ENV + - checkout + - run: go test + +workflows: + version: 2 + test: + jobs: + - test From cd735680d9ca6efe402d3819a5f89e2ec7f751cc Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 8 Nov 2019 10:46:59 -0800 Subject: [PATCH 09/30] Move simple into subpackage --- .circleci/config.yml | 2 +- simple.go => simple/simple.go | 16 ++++++++------ simple_test.go | 41 ----------------------------------- varopt_test.go | 11 +++++----- 4 files changed, 16 insertions(+), 54 deletions(-) rename simple.go => simple/simple.go (80%) delete mode 100644 simple_test.go diff --git a/.circleci/config.yml b/.circleci/config.yml index 1e65ea3..b31eb32 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -12,7 +12,7 @@ jobs: echo 'export GOPATH="$HOME/go"' >> $BASH_ENV source $BASH_ENV - checkout - - run: go test + - run: go test -v ./... workflows: version: 2 diff --git a/simple.go b/simple/simple.go similarity index 80% rename from simple.go rename to simple/simple.go index 6ff64f1..c086dfa 100644 --- a/simple.go +++ b/simple/simple.go @@ -1,9 +1,11 @@ // Copyright 2019, LightStep Inc. -package varopt +package simple import ( "math/rand" + + "github.com/lightstep/varopt" ) // Simple implements unweighted reservoir sampling using Algorithm R @@ -12,13 +14,13 @@ import ( type Simple struct { capacity int observed int - buffer []Sample + buffer []varopt.Sample rnd *rand.Rand } -// NewSimple returns a simple reservoir sampler with given capacity +// New returns a simple reservoir sampler with given capacity // (i.e., reservoir size) and random number generator. -func NewSimple(capacity int, rnd *rand.Rand) *Simple { +func New(capacity int, rnd *rand.Rand) *Simple { return &Simple{ capacity: capacity, rnd: rnd, @@ -27,11 +29,11 @@ func NewSimple(capacity int, rnd *rand.Rand) *Simple { // Add considers a new observation for the sample. Items have unit // weight. -func (s *Simple) Add(span Sample) { +func (s *Simple) Add(span varopt.Sample) { s.observed++ if s.buffer == nil { - s.buffer = make([]Sample, 0, s.capacity) + s.buffer = make([]varopt.Sample, 0, s.capacity) } if len(s.buffer) < s.capacity { @@ -47,7 +49,7 @@ func (s *Simple) Add(span Sample) { } // Get returns the i'th selected item from the sample. -func (s *Simple) Get(i int) Sample { +func (s *Simple) Get(i int) varopt.Sample { return s.buffer[i] } diff --git a/simple_test.go b/simple_test.go deleted file mode 100644 index c70503e..0000000 --- a/simple_test.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019, LightStep Inc. - -package varopt_test - -import ( - "math/rand" - "testing" - - "github.com/lightstep/varopt" - "github.com/stretchr/testify/require" -) - -type iRec int - -func TestSimple(t *testing.T) { - const ( - popSize = 1e6 - sampleProb = 0.1 - sampleSize int = popSize * sampleProb - epsilon = 0.01 - ) - - rnd := rand.New(rand.NewSource(17167)) - - ss := varopt.NewSimple(sampleSize, rnd) - - psum := 0. - for i := 0; i < popSize; i++ { - ss.Add(iRec(i)) - psum += float64(i) - } - - require.Equal(t, ss.Size(), sampleSize) - - ssum := 0.0 - for i := 0; i < sampleSize; i++ { - ssum += float64(ss.Get(i).(iRec)) - } - - require.InEpsilon(t, ssum/float64(ss.Size()), psum/popSize, epsilon) -} diff --git a/varopt_test.go b/varopt_test.go index 3d6eec3..06f7bc1 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/lightstep/varopt" + "github.com/lightstep/varopt/simple" "github.com/stretchr/testify/require" ) @@ -106,15 +107,15 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { for _, blockList := range blockLists { for _, block := range blockList { - simple := varopt.NewSimple(sampleSize, rnd) + ss := simple.New(sampleSize, rnd) for _, s := range block { - simple.Add(s) + ss.Add(s) } - weight := simple.Weight() - for i := 0; i < simple.Size(); i++ { - vsample.Add(simple.Get(i), weight) + weight := ss.Weight() + for i := 0; i < ss.Size(); i++ { + vsample.Add(ss.Get(i), weight) } } } From 2afca792c50cd4cab9c8709c47553ad0b5f3aa41 Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 8 Nov 2019 12:49:00 -0800 Subject: [PATCH 10/30] Restore the simple test --- simple/simple_test.go | 41 +++++++++++++++++++++++++++++++++++++++++ varopt.go | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 simple/simple_test.go diff --git a/simple/simple_test.go b/simple/simple_test.go new file mode 100644 index 0000000..4ea9592 --- /dev/null +++ b/simple/simple_test.go @@ -0,0 +1,41 @@ +// Copyright 2019, LightStep Inc. + +package simple_test + +import ( + "math/rand" + "testing" + + "github.com/lightstep/varopt/simple" + "github.com/stretchr/testify/require" +) + +type iRec int + +func TestSimple(t *testing.T) { + const ( + popSize = 1e6 + sampleProb = 0.1 + sampleSize int = popSize * sampleProb + epsilon = 0.01 + ) + + rnd := rand.New(rand.NewSource(17167)) + + ss := simple.New(sampleSize, rnd) + + psum := 0. + for i := 0; i < popSize; i++ { + ss.Add(iRec(i)) + psum += float64(i) + } + + require.Equal(t, ss.Size(), sampleSize) + + ssum := 0.0 + for i := 0; i < sampleSize; i++ { + ssum += float64(ss.Get(i).(iRec)) + } + + require.InEpsilon(t, ssum/float64(ss.Size()), psum/popSize, epsilon) +} diff --git a/varopt.go b/varopt.go index 1856614..5ab1ee5 100644 --- a/varopt.go +++ b/varopt.go @@ -171,7 +171,7 @@ func (s *Varopt) TotalCount() int { } // Tau returns the current large-weight threshold. Weights larger -// than Tau() carry their exact weight int he sample. See the VarOpt +// than Tau() carry their exact weight in the sample. See the VarOpt // paper for details. func (s *Varopt) Tau() float64 { return s.tau From 7ec92c7916046345eb267889db7a52405372897f Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 8 Nov 2019 13:35:18 -0800 Subject: [PATCH 11/30] Add simple/doc.go --- README.md | 6 +++--- doc.go | 2 +- simple/doc.go | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 simple/doc.go diff --git a/README.md b/README.md index ba73609..a949cc0 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ Thorup. VarOpt is a reservoir-type sampler that maintains a fixed-size sample and provides a mechanism for merging unequal-weight samples. -This package also includes a simple reservoir sampling algorithm, -often useful in conjunction with weighed reservoir sampling, using -Algorithm R from [Random sampling with a +This repository also includes a simple reservoir sampling algorithm, +often useful in conjunction with weighed reservoir sampling, that +implements Algorithm R from [Random sampling with a reservoir](https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R) (1985) by Jeffrey Vitter. diff --git a/doc.go b/doc.go index deb2de9..fe0e2e6 100644 --- a/doc.go +++ b/doc.go @@ -1,7 +1,7 @@ // Copyright 2019, LightStep Inc. /* -Package varopt is an implementation of VarOpt, an unbiased weighted +Package varopt contains an implementation of VarOpt, an unbiased weighted sampling algorithm described in the paper "Stream sampling for variance-optimal estimation of subset sums" https://arxiv.org/pdf/0803.0473.pdf (2008), by Edith Cohen, Nick diff --git a/simple/doc.go b/simple/doc.go new file mode 100644 index 0000000..0d79ebe --- /dev/null +++ b/simple/doc.go @@ -0,0 +1,4 @@ +// Copyright 2019, LightStep Inc. + +/* Package simple implements an unweighted reservoir sampling algorithm. */ +package simple From 4b69751fc60eb5e2fad15643ebdb68ea26375984 Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 8 Nov 2019 20:31:24 -0800 Subject: [PATCH 12/30] Rephrase and fix typos --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a949cc0..131b6b3 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,12 @@ reservoir](https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R) A typical use of VarOpt sampling is to estimate network flows using sample packets. In this use-case, the weight applied to each sample -is the size of the packet. Beacuse VarOpt computes an unbiased -sample, the sample data points can be summarized along secondary -dimensions. For example, we can select a subset of the sample +is the size of the packet. Because VarOpt computes an unbiased +sample, sample data points can be summarized along secondary +dimensions. For example, we can select a subset of sampled packets according to a secondary attribute, sum the sample weights, and the -result is expected value of the secondary attribute in the original -population. +result is expected to equal the size of packets corresponding to the +secondary attribute from the original population. See [weighted_test.go](https://github.com/lightstep/varopt/blob/master/weighted_test.go) for an example. @@ -42,10 +42,10 @@ maintaining frequency estimates. In this use-case, the weight assigned to each observation is the inverse probability of the bucket it belongs to. The result of weighted sampling with inverse-probability weights is a uniform -expectation, in this example we expect an equal number of observations -falling into each bucket. Each observation represents a frequency of -its sample weight (computed by VarOpt) divided by its original weight -(the inverse-probability). +expectated value; in this example we expect an equal number of +observations falling into each bucket. Each observation represents a +frequency of its sample weight (computed by VarOpt) divided by its +original weight (the inverse-probability). See [frequency_test.go](https://github.com/lightstep/varopt/blob/master/frequency_test.go) for an example. From 4062031cc972442adf70edcc136ff6daac969213 Mon Sep 17 00:00:00 2001 From: jmacd Date: Sat, 9 Nov 2019 11:49:31 -0800 Subject: [PATCH 13/30] Add a benchmark --- benchmark_test.go | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 benchmark_test.go diff --git a/benchmark_test.go b/benchmark_test.go new file mode 100644 index 0000000..a2f7c04 --- /dev/null +++ b/benchmark_test.go @@ -0,0 +1,58 @@ +// Copyright 2019, LightStep Inc. + +package varopt_test + +import ( + "math" + "math/rand" + "testing" + + "github.com/lightstep/varopt" +) + +func normValue(rnd *rand.Rand) float64 { + return 1 + math.Abs(rnd.NormFloat64()) +} + +func expValue(rnd *rand.Rand) float64 { + return rnd.ExpFloat64() +} + +func BenchmarkAdd_Norm_100(b *testing.B) { + benchmarkAdd(b, 100, normValue) +} + +func BenchmarkAdd_Norm_10000(b *testing.B) { + benchmarkAdd(b, 10000, normValue) +} + +func BenchmarkAdd_Norm_1000000(b *testing.B) { + benchmarkAdd(b, 1000000, normValue) +} + +func BenchmarkAdd_Exp_100(b *testing.B) { + benchmarkAdd(b, 100, expValue) +} + +func BenchmarkAdd_Exp_10000(b *testing.B) { + benchmarkAdd(b, 10000, expValue) +} + +func BenchmarkAdd_Exp_1000000(b *testing.B) { + benchmarkAdd(b, 1000000, expValue) +} + +func benchmarkAdd(b *testing.B, size int, f func(rnd *rand.Rand) float64) { + b.ReportAllocs() + rnd := rand.New(rand.NewSource(3331)) + v := varopt.New(size, rnd) + weights := make([]float64, b.N) + for i := 0; i < b.N; i++ { + weights[i] = f(rnd) + } + + b.StartTimer() + for i := 0; i < b.N; i++ { + v.Add(nil, weights[i]) + } +} From ba028d8125e01c56a625258c7a7341efd7e9b8fc Mon Sep 17 00:00:00 2001 From: jmacd Date: Sat, 9 Nov 2019 11:55:29 -0800 Subject: [PATCH 14/30] Add benchmark notes --- benchmark_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/benchmark_test.go b/benchmark_test.go index a2f7c04..131a08d 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -1,4 +1,18 @@ // Copyright 2019, LightStep Inc. +// +// The benchmark results point to a performance drop when the +// largeHeap starts to be used because of interface conversions in and +// out of the heap, primarily due to the heap interface. This +// suggests room for improvement by avoiding the built-in heap. + +/* +BenchmarkAdd_Norm_100-8 37540165 32.1 ns/op 8 B/op 0 allocs/op +BenchmarkAdd_Norm_10000-8 39850280 30.6 ns/op 8 B/op 0 allocs/op +BenchmarkAdd_Norm_1000000-8 7958835 183 ns/op 52 B/op 0 allocs/op +BenchmarkAdd_Exp_100-8 41565934 28.5 ns/op 8 B/op 0 allocs/op +BenchmarkAdd_Exp_10000-8 43622184 29.2 ns/op 8 B/op 0 allocs/op +BenchmarkAdd_Exp_1000000-8 8103663 220 ns/op 55 B/op 0 allocs/op +*/ package varopt_test From db2f575931030ee0fdd5ad2324d12f8517f2e96c Mon Sep 17 00:00:00 2001 From: jmacd Date: Tue, 12 Nov 2019 23:26:37 -0800 Subject: [PATCH 15/30] Check for NaN weight --- varopt.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/varopt.go b/varopt.go index 5ab1ee5..fc802c5 100644 --- a/varopt.go +++ b/varopt.go @@ -5,6 +5,7 @@ package varopt import ( "container/heap" "fmt" + "math" "math/rand" ) @@ -64,8 +65,8 @@ func (s *Varopt) Add(sample Sample, weight float64) { weight: weight, } - if weight <= 0 { - panic(fmt.Sprint("Invalid weight <= 0: ", weight)) + if weight <= 0 || math.IsNaN(weight) { + panic(fmt.Sprint("Invalid weight: ", weight)) } s.totalCount++ From 5cd2650f3fdc3f9c914b102ec11df57c687d439d Mon Sep 17 00:00:00 2001 From: jmacd Date: Wed, 13 Nov 2019 21:51:59 -0800 Subject: [PATCH 16/30] Return an error, don't panic --- varopt.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/varopt.go b/varopt.go index fc802c5..f786d12 100644 --- a/varopt.go +++ b/varopt.go @@ -49,6 +49,8 @@ type vsample struct { type largeHeap []vsample +var ErrInvalidWeight = fmt.Errorf("Negative or NaN weight") + // New returns a new Varopt sampler with given capacity (i.e., // reservoir size) and random number generator. func New(capacity int, rnd *rand.Rand) *Varopt { @@ -59,14 +61,16 @@ func New(capacity int, rnd *rand.Rand) *Varopt { } // Add considers a new observation for the sample with given weight. -func (s *Varopt) Add(sample Sample, weight float64) { +// +// An error will be returned if the weight is either negative or NaN. +func (s *Varopt) Add(sample Sample, weight float64) error { individual := vsample{ sample: sample, weight: weight, } if weight <= 0 || math.IsNaN(weight) { - panic(fmt.Sprint("Invalid weight: ", weight)) + return ErrInvalidWeight } s.totalCount++ @@ -74,7 +78,7 @@ func (s *Varopt) Add(sample Sample, weight float64) { if s.Size() < s.capacity { heap.Push(&s.L, individual) - return + return nil } // the X <- {} step from the paper is not done here, @@ -116,6 +120,7 @@ func (s *Varopt) Add(sample Sample, weight float64) { } s.T = append(s.T, s.X...) s.X = s.X[:0] + return nil } func (s *Varopt) uniform() float64 { From b92f0877e9a51c422d2e715ff97a550fd5f70a68 Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 15 Nov 2019 12:20:01 -0800 Subject: [PATCH 17/30] Inline the large-weight heap to avoid interface conversions --- varopt.go | 66 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/varopt.go b/varopt.go index 5ab1ee5..7e4449c 100644 --- a/varopt.go +++ b/varopt.go @@ -3,7 +3,6 @@ package varopt import ( - "container/heap" "fmt" "math/rand" ) @@ -17,7 +16,7 @@ type Varopt struct { // Random number generator rnd *rand.Rand - // Large-weight items + // Large-weight items stored in a min-heap. L largeHeap // Light-weight items. @@ -72,7 +71,7 @@ func (s *Varopt) Add(sample Sample, weight float64) { s.totalWeight += weight if s.Size() < s.capacity { - heap.Push(&s.L, individual) + s.L.push(individual) return } @@ -82,14 +81,14 @@ func (s *Varopt) Add(sample Sample, weight float64) { W := s.tau * float64(len(s.T)) if weight > s.tau { - heap.Push(&s.L, individual) + s.L.push(individual) } else { s.X = append(s.X, individual) W += weight } for len(s.L) > 0 && W >= float64(len(s.T)+len(s.X)-1)*s.L[0].weight { - h := heap.Pop(&s.L).(vsample) + h := s.L.pop() s.X = append(s.X, h) W += h.weight } @@ -177,26 +176,49 @@ func (s *Varopt) Tau() float64 { return s.tau } -func (b largeHeap) Len() int { - return len(b) -} +func (lp *largeHeap) push(v vsample) { + l := append(*lp, v) + n := len(l) - 1 -func (b largeHeap) Swap(i, j int) { - b[i], b[j] = b[j], b[i] -} + // This copies the body of heap.up(). + j := n + for { + i := (j - 1) / 2 // parent + if i == j || l[j].weight >= l[i].weight { + break + } + l[i], l[j] = l[j], l[i] + j = i + } -func (b largeHeap) Less(i, j int) bool { - return b[i].weight < b[j].weight + *lp = l } -func (b *largeHeap) Push(x interface{}) { - *b = append(*b, x.(vsample)) -} +func (lp *largeHeap) pop() vsample { + l := *lp + n := len(l) - 1 + result := l[0] + l[0] = l[n] + l = l[:n] + + // This copies the body of heap.down(). + i := 0 + for { + j1 := 2*i + 1 + if j1 >= n || j1 < 0 { // j1 < 0 after int overflow + break + } + j := j1 // left child + if j2 := j1 + 1; j2 < n && l[j2].weight < l[j1].weight { + j = j2 // = 2*i + 2 // right child + } + if l[j].weight >= l[i].weight { + break + } + l[i], l[j] = l[j], l[i] + i = j + } -func (b *largeHeap) Pop() interface{} { - old := *b - n := len(old) - x := old[n-1] - *b = old[0 : n-1] - return x + *lp = l + return result } From b00b2fa4dd994f7f8bcb12518bcc1d84de31c555 Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 15 Nov 2019 12:42:56 -0800 Subject: [PATCH 18/30] Add a test --- varopt.go | 2 +- varopt_test.go | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/varopt.go b/varopt.go index f786d12..22751c2 100644 --- a/varopt.go +++ b/varopt.go @@ -49,7 +49,7 @@ type vsample struct { type largeHeap []vsample -var ErrInvalidWeight = fmt.Errorf("Negative or NaN weight") +var ErrInvalidWeight = fmt.Errorf("Negative, zero, or NaN weight") // New returns a new Varopt sampler with given capacity (i.e., // reservoir size) and random number generator. diff --git a/varopt_test.go b/varopt_test.go index 06f7bc1..c7f0b68 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -147,3 +147,17 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { [][][]varopt.Sample{smallBlocks, bigBlocks}, ) } + +func TestInvalidWeight(t *testing.T) { + rnd := rand.New(rand.NewSource(98887)) + v := varopt.New(1, rnd) + + err := v.Add(nil, math.NaN()) + require.Equal(t, err, varopt.ErrInvalidWeight) + + err = v.Add(nil, -1) + require.Equal(t, err, varopt.ErrInvalidWeight) + + err = v.Add(nil, 0) + require.Equal(t, err, varopt.ErrInvalidWeight) +} From f53b1a85587a206e19f8837dc367a7ff3f87acd9 Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 15 Nov 2019 21:51:31 -0800 Subject: [PATCH 19/30] Move heap code to internal for better testing --- internal/sampleheap.go | 57 ++++++++++++++++++++++++ internal/sampleheap_test.go | 58 ++++++++++++++++++++++++ varopt.go | 88 ++++++++----------------------------- 3 files changed, 133 insertions(+), 70 deletions(-) create mode 100644 internal/sampleheap.go create mode 100644 internal/sampleheap_test.go diff --git a/internal/sampleheap.go b/internal/sampleheap.go new file mode 100644 index 0000000..c3d82a5 --- /dev/null +++ b/internal/sampleheap.go @@ -0,0 +1,57 @@ +// Copyright 2019, LightStep Inc. + +package internal + +type Vsample struct { + Sample interface{} + Weight float64 +} + +type SampleHeap []Vsample + +func (sh *SampleHeap) Push(v Vsample) { + l := append(*sh, v) + n := len(l) - 1 + + // This copies the body of heap.up(). + j := n + for { + i := (j - 1) / 2 // parent + if i == j || l[j].Weight >= l[i].Weight { + break + } + l[i], l[j] = l[j], l[i] + j = i + } + + *sh = l +} + +func (sh *SampleHeap) Pop() Vsample { + l := *sh + n := len(l) - 1 + result := l[0] + l[0] = l[n] + l = l[:n] + + // This copies the body of heap.down(). + i := 0 + for { + j1 := 2*i + 1 + if j1 >= n || j1 < 0 { // j1 < 0 after int overflow + break + } + j := j1 // left child + if j2 := j1 + 1; j2 < n && l[j2].Weight < l[j1].Weight { + j = j2 // = 2*i + 2 // right child + } + if l[j].Weight >= l[i].Weight { + break + } + l[i], l[j] = l[j], l[i] + i = j + } + + *sh = l + return result +} diff --git a/internal/sampleheap_test.go b/internal/sampleheap_test.go new file mode 100644 index 0000000..50615c0 --- /dev/null +++ b/internal/sampleheap_test.go @@ -0,0 +1,58 @@ +// Copyright 2019, LightStep Inc. + +package internal_test + +import ( + "container/heap" + "math/rand" + "testing" + + "github.com/lightstep/varopt/internal" + "github.com/stretchr/testify/require" +) + +type simpleHeap []float64 + +func (s *simpleHeap) Len() int { + return len(*s) +} + +func (s *simpleHeap) Swap(i, j int) { + (*s)[i], (*s)[j] = (*s)[j], (*s)[i] +} + +func (s *simpleHeap) Less(i, j int) bool { + return (*s)[i] < (*s)[j] +} + +func (s *simpleHeap) Push(x interface{}) { + *s = append(*s, x.(float64)) +} + +func (s *simpleHeap) Pop() interface{} { + old := *s + n := len(old) + x := old[n-1] + *s = old[0 : n-1] + return x +} + +func TestLargeHeap(t *testing.T) { + var L internal.SampleHeap + var S simpleHeap + + for i := 0; i < 1e6; i++ { + v := rand.NormFloat64() + L.Push(internal.Vsample{Weight: v}) + heap.Push(&S, v) + } + + for len(S) > 0 { + v1 := heap.Pop(&S).(float64) + v2 := L.Pop().Weight + + require.Equal(t, v1, v2) + } + + require.Equal(t, 0, len(L)) +} diff --git a/varopt.go b/varopt.go index a535d35..44b5843 100644 --- a/varopt.go +++ b/varopt.go @@ -6,6 +6,8 @@ import ( "fmt" "math" "math/rand" + + "github.com/lightstep/varopt/internal" ) // Varopt implements the algorithm from Stream sampling for @@ -18,13 +20,13 @@ type Varopt struct { rnd *rand.Rand // Large-weight items stored in a min-heap. - L largeHeap + L internal.SampleHeap // Light-weight items. - T []vsample + T []internal.Vsample // Temporary buffer. - X []vsample + X []internal.Vsample // Current threshold tau float64 @@ -41,13 +43,6 @@ type Varopt struct { // passed in separately. type Sample interface{} -type vsample struct { - sample Sample - weight float64 -} - -type largeHeap []vsample - var ErrInvalidWeight = fmt.Errorf("Negative, zero, or NaN weight") // New returns a new Varopt sampler with given capacity (i.e., @@ -63,9 +58,9 @@ func New(capacity int, rnd *rand.Rand) *Varopt { // // An error will be returned if the weight is either negative or NaN. func (s *Varopt) Add(sample Sample, weight float64) error { - individual := vsample{ - sample: sample, - weight: weight, + individual := internal.Vsample{ + Sample: sample, + Weight: weight, } if weight <= 0 || math.IsNaN(weight) { @@ -76,7 +71,7 @@ func (s *Varopt) Add(sample Sample, weight float64) error { s.totalWeight += weight if s.Size() < s.capacity { - s.L.push(individual) + s.L.Push(individual) return nil } @@ -86,16 +81,16 @@ func (s *Varopt) Add(sample Sample, weight float64) error { W := s.tau * float64(len(s.T)) if weight > s.tau { - s.L.push(individual) + s.L.Push(individual) } else { s.X = append(s.X, individual) W += weight } - for len(s.L) > 0 && W >= float64(len(s.T)+len(s.X)-1)*s.L[0].weight { - h := s.L.pop() + for len(s.L) > 0 && W >= float64(len(s.T)+len(s.X)-1)*s.L[0].Weight { + h := s.L.Pop() s.X = append(s.X, h) - W += h.weight + W += h.Weight } s.tau = W / float64(len(s.T)+len(s.X)-1) @@ -103,7 +98,7 @@ func (s *Varopt) Add(sample Sample, weight float64) error { d := 0 for d < len(s.X) && r >= 0 { - wxd := s.X[d].weight + wxd := s.X[d].Weight r -= (1 - wxd/s.tau) d++ } @@ -136,10 +131,10 @@ func (s *Varopt) uniform() float64 { // GetOriginalWeight(i). func (s *Varopt) Get(i int) (Sample, float64) { if i < len(s.L) { - return s.L[i].sample, s.L[i].weight + return s.L[i].Sample, s.L[i].Weight } - return s.T[i-len(s.L)].sample, s.tau + return s.T[i-len(s.L)].Sample, s.tau } // GetOriginalWeight returns the original input weight of the sample @@ -147,10 +142,10 @@ func (s *Varopt) Get(i int) (Sample, float64) { // frequency from the adjusted sample weight. func (s *Varopt) GetOriginalWeight(i int) float64 { if i < len(s.L) { - return s.L[i].weight + return s.L[i].Weight } - return s.T[i-len(s.L)].weight + return s.T[i-len(s.L)].Weight } // Capacity returns the size of the reservoir. This is the maximum @@ -181,50 +176,3 @@ func (s *Varopt) TotalCount() int { func (s *Varopt) Tau() float64 { return s.tau } - -func (lp *largeHeap) push(v vsample) { - l := append(*lp, v) - n := len(l) - 1 - - // This copies the body of heap.up(). - j := n - for { - i := (j - 1) / 2 // parent - if i == j || l[j].weight >= l[i].weight { - break - } - l[i], l[j] = l[j], l[i] - j = i - } - - *lp = l -} - -func (lp *largeHeap) pop() vsample { - l := *lp - n := len(l) - 1 - result := l[0] - l[0] = l[n] - l = l[:n] - - // This copies the body of heap.down(). - i := 0 - for { - j1 := 2*i + 1 - if j1 >= n || j1 < 0 { // j1 < 0 after int overflow - break - } - j := j1 // left child - if j2 := j1 + 1; j2 < n && l[j2].weight < l[j1].weight { - j = j2 // = 2*i + 2 // right child - } - if l[j].weight >= l[i].weight { - break - } - l[i], l[j] = l[j], l[i] - i = j - } - - *lp = l - return result -} From 4b2b9c3b5b56890c39de16f9adf85b0306eeac8d Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 22 Nov 2019 22:48:50 -0800 Subject: [PATCH 20/30] Add a Reset method --- varopt.go | 11 +++++++++++ varopt_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/varopt.go b/varopt.go index 44b5843..37598cc 100644 --- a/varopt.go +++ b/varopt.go @@ -54,6 +54,17 @@ func New(capacity int, rnd *rand.Rand) *Varopt { } } +// Reset returns the sampler to its initial state, maintaining its +// capacity and random number source. +func (s *Varopt) Reset() { + s.L = s.L[:0] + s.T = s.T[:0] + s.X = s.X[:0] + s.tau = 0 + s.totalCount = 0 + s.totalWeight = 0 +} + // Add considers a new observation for the sample with given weight. // // An error will be returned if the weight is either negative or NaN. diff --git a/varopt_test.go b/varopt_test.go index c7f0b68..2e1a9cd 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -161,3 +161,28 @@ func TestInvalidWeight(t *testing.T) { err = v.Add(nil, 0) require.Equal(t, err, varopt.ErrInvalidWeight) } + +func TestReset(t *testing.T) { + const capacity = 10 + const insert = 100 + rnd := rand.New(rand.NewSource(98887)) + v := varopt.New(capacity, rnd) + + sum := 0. + for i := 1.; i <= insert; i++ { + v.Add(nil, i) + sum += i + } + + require.Equal(t, capacity, v.Size()) + require.Equal(t, insert, v.TotalCount()) + require.Equal(t, sum, v.TotalWeight()) + require.Less(t, 0., v.Tau()) + + v.Reset() + + require.Equal(t, 0, v.Size()) + require.Equal(t, 0, v.TotalCount()) + require.Equal(t, 0., v.TotalWeight()) + require.Equal(t, 0., v.Tau()) +} From d2bfbc820e6da2b0fcda49c0485fdb2abb718349 Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 22 Nov 2019 23:16:41 -0800 Subject: [PATCH 21/30] Return the ejected sample from Add --- varopt.go | 13 +++++++---- varopt_test.go | 58 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/varopt.go b/varopt.go index 37598cc..0b8a570 100644 --- a/varopt.go +++ b/varopt.go @@ -66,16 +66,18 @@ func (s *Varopt) Reset() { } // Add considers a new observation for the sample with given weight. +// If there is an item ejected from the same as a result, the item is +// returned. // // An error will be returned if the weight is either negative or NaN. -func (s *Varopt) Add(sample Sample, weight float64) error { +func (s *Varopt) Add(sample Sample, weight float64) (Sample, error) { individual := internal.Vsample{ Sample: sample, Weight: weight, } if weight <= 0 || math.IsNaN(weight) { - return ErrInvalidWeight + return nil, ErrInvalidWeight } s.totalCount++ @@ -83,7 +85,7 @@ func (s *Varopt) Add(sample Sample, weight float64) error { if s.Size() < s.capacity { s.L.Push(individual) - return nil + return nil, nil } // the X <- {} step from the paper is not done here, @@ -113,19 +115,22 @@ func (s *Varopt) Add(sample Sample, weight float64) error { r -= (1 - wxd/s.tau) d++ } + var eject Sample if r < 0 { if d < len(s.X) { s.X[d], s.X[len(s.X)-1] = s.X[len(s.X)-1], s.X[d] } + eject = s.X[len(s.X)-1].Sample s.X = s.X[:len(s.X)-1] } else { ti := s.rnd.Intn(len(s.T)) s.T[ti], s.T[len(s.T)-1] = s.T[len(s.T)-1], s.T[ti] + eject = s.T[len(s.T)-1].Sample s.T = s.T[:len(s.T)-1] } s.T = append(s.T, s.X...) s.X = s.X[:0] - return nil + return eject, nil } func (s *Varopt) uniform() float64 { diff --git a/varopt_test.go b/varopt_test.go index 2e1a9cd..484cf47 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -152,13 +152,13 @@ func TestInvalidWeight(t *testing.T) { rnd := rand.New(rand.NewSource(98887)) v := varopt.New(1, rnd) - err := v.Add(nil, math.NaN()) + _, err := v.Add(nil, math.NaN()) require.Equal(t, err, varopt.ErrInvalidWeight) - err = v.Add(nil, -1) + _, err = v.Add(nil, -1) require.Equal(t, err, varopt.ErrInvalidWeight) - err = v.Add(nil, 0) + _, err = v.Add(nil, 0) require.Equal(t, err, varopt.ErrInvalidWeight) } @@ -186,3 +186,55 @@ func TestReset(t *testing.T) { require.Equal(t, 0., v.TotalWeight()) require.Equal(t, 0., v.Tau()) } + +func TestEject(t *testing.T) { + const capacity = 100 + const rounds = 10000 + const maxvalue = 10000 + + entries := make([]int, capacity+1) + freelist := make([]*int, capacity+1) + + for i := range entries { + freelist[i] = &entries[i] + } + + // Make two deterministically equal samplers + rnd1 := rand.New(rand.NewSource(98887)) + rnd2 := rand.New(rand.NewSource(98887)) + vsrc := rand.New(rand.NewSource(98887)) + + expected := varopt.New(capacity, rnd1) + ejector := varopt.New(capacity, rnd2) + + for i := 0; i < rounds; i++ { + value := vsrc.Intn(maxvalue) + weight := vsrc.ExpFloat64() + + _, _ = expected.Add(&value, weight) + + lastitem := len(freelist) - 1 + item := freelist[lastitem] + freelist = freelist[:lastitem] + + *item = value + eject, _ := ejector.Add(item, weight) + + if eject != nil { + freelist = append(freelist, eject.(*int)) + } + } + + require.Equal(t, expected.Size(), ejector.Size()) + require.Equal(t, expected.TotalCount(), ejector.TotalCount()) + require.Equal(t, expected.TotalWeight(), ejector.TotalWeight()) + require.Equal(t, expected.Tau(), ejector.Tau()) + + for i := 0; i < capacity; i++ { + expectItem, expectWeight := expected.Get(i) + ejectItem, ejectWeight := expected.Get(i) + + require.Equal(t, *expectItem.(*int), *ejectItem.(*int)) + require.Equal(t, expectWeight, ejectWeight) + } +} From be3c37ae3ccdac4cb2b999e6fb494c6546665bbc Mon Sep 17 00:00:00 2001 From: jmacd Date: Fri, 22 Nov 2019 23:18:39 -0800 Subject: [PATCH 22/30] Typo --- varopt.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varopt.go b/varopt.go index 0b8a570..99fd01c 100644 --- a/varopt.go +++ b/varopt.go @@ -66,8 +66,8 @@ func (s *Varopt) Reset() { } // Add considers a new observation for the sample with given weight. -// If there is an item ejected from the same as a result, the item is -// returned. +// If there is an item ejected from the sample as a result, the item +// is returned to allow re-use of memory. // // An error will be returned if the weight is either negative or NaN. func (s *Varopt) Add(sample Sample, weight float64) (Sample, error) { From 30965f6f1f8f97ecc0ead16b2b5fbf7b2b1e0ad3 Mon Sep 17 00:00:00 2001 From: jmacd Date: Sun, 24 Nov 2019 21:29:48 -0800 Subject: [PATCH 23/30] Pre-allocate main buffers --- varopt.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/varopt.go b/varopt.go index 99fd01c..f573850 100644 --- a/varopt.go +++ b/varopt.go @@ -51,6 +51,8 @@ func New(capacity int, rnd *rand.Rand) *Varopt { return &Varopt{ capacity: capacity, rnd: rnd, + L: make(internal.SampleHeap, 0, capacity), + T: make(internal.SampleHeap, 0, capacity), } } From 8de72e7fedb787571ca65e12cdfe33fde01a485b Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jul 2021 16:24:45 -0700 Subject: [PATCH 24/30] Check for infinity, it creates trouble (#15) --- varopt.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/varopt.go b/varopt.go index f573850..dbe605b 100644 --- a/varopt.go +++ b/varopt.go @@ -43,7 +43,7 @@ type Varopt struct { // passed in separately. type Sample interface{} -var ErrInvalidWeight = fmt.Errorf("Negative, zero, or NaN weight") +var ErrInvalidWeight = fmt.Errorf("Negative, Zero, Inf or NaN weight") // New returns a new Varopt sampler with given capacity (i.e., // reservoir size) and random number generator. @@ -78,7 +78,7 @@ func (s *Varopt) Add(sample Sample, weight float64) (Sample, error) { Weight: weight, } - if weight <= 0 || math.IsNaN(weight) { + if weight <= 0 || math.IsNaN(weight) || math.IsInf(weight, 1) { return nil, ErrInvalidWeight } From 69a5c9ff0cf1934f2d96025f682f54a5235c5670 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jul 2021 16:36:02 -0700 Subject: [PATCH 25/30] Use Apache Software Licence v2 (#17) --- LICENSE | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 201 insertions(+), 21 deletions(-) diff --git a/LICENSE b/LICENSE index 017c0cd..261eeb9 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,201 @@ -The MIT License (MIT) - -Copyright (c) 2019 - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From cc9013009aa09971e79b29725f8a1a0c3c8cb984 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Tue, 13 Jul 2021 21:36:31 -0700 Subject: [PATCH 26/30] Remove a test-only method (#16) * Remove a test-only method * update circle Go version * simplify circleci * mod update --- .circleci/config.yml | 7 +------ go.mod | 2 +- go.sum | 1 + simple/simple.go | 7 +------ varopt_test.go | 2 +- 5 files changed, 5 insertions(+), 14 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b31eb32..3b06f3c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,13 +4,8 @@ jobs: test: working_directory: ~/go/src/github.com/lightstep/varopt docker: - - image: circleci/golang:1.13 + - image: circleci/golang:1.15 steps: - - run: - name: "configure environment" - command: | - echo 'export GOPATH="$HOME/go"' >> $BASH_ENV - source $BASH_ENV - checkout - run: go test -v ./... diff --git a/go.mod b/go.mod index 1f0ead4..e61e0b6 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/lightstep/varopt -go 1.13 +go 1.15 require github.com/stretchr/testify v1.4.0 diff --git a/go.sum b/go.sum index e863f51..8fdee58 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,7 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/simple/simple.go b/simple/simple.go index c086dfa..e3fb340 100644 --- a/simple/simple.go +++ b/simple/simple.go @@ -53,17 +53,12 @@ func (s *Simple) Get(i int) varopt.Sample { return s.buffer[i] } -// Get returns the number of items in the sample. If the reservoir is +// Size returns the number of items in the sample. If the reservoir is // full, Size() equals Capacity(). func (s *Simple) Size() int { return len(s.buffer) } -// Weight returns the adjusted weight of each item in the sample. -func (s *Simple) Weight() float64 { - return float64(s.observed) / float64(s.Size()) -} - // Count returns the number of items that were observed. func (s *Simple) Count() int { return s.observed diff --git a/varopt_test.go b/varopt_test.go index 484cf47..a56c691 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -113,7 +113,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { ss.Add(s) } - weight := ss.Weight() + weight := float64(ss.Count()) / float64(ss.Size()) for i := 0; i < ss.Size(); i++ { vsample.Add(ss.Get(i), weight) } From ef41899341951e2bb7f0c57d8c6933cc817a5585 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 20 Nov 2023 13:05:23 -0800 Subject: [PATCH 27/30] Make this package generic --- .gitignore | 3 ++ .gitignore~ | 2 ++ benchmark_test.go | 6 ++-- frequency_test.go | 11 ++++--- go.mod | 9 ++++-- go.sum | 31 ++++++++++++++++---- internal/sampleheap.go | 10 +++---- internal/sampleheap_test.go | 9 ++++-- simple/simple.go | 33 +++++++++++---------- simple/simple_test.go | 8 ++--- varopt.go | 58 +++++++++++++++++++------------------ varopt_test.go | 54 +++++++++++++++++----------------- weighted_test.go | 5 ++-- 13 files changed, 137 insertions(+), 102 deletions(-) create mode 100644 .gitignore create mode 100644 .gitignore~ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a3bfdc --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +go.work +go.work.sum + diff --git a/.gitignore~ b/.gitignore~ new file mode 100644 index 0000000..42e9f7c --- /dev/null +++ b/.gitignore~ @@ -0,0 +1,2 @@ +go.work +go.work.sum diff --git a/benchmark_test.go b/benchmark_test.go index 131a08d..5ca86fe 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -56,10 +56,12 @@ func BenchmarkAdd_Exp_1000000(b *testing.B) { benchmarkAdd(b, 1000000, expValue) } +type thing struct{} + func benchmarkAdd(b *testing.B, size int, f func(rnd *rand.Rand) float64) { b.ReportAllocs() rnd := rand.New(rand.NewSource(3331)) - v := varopt.New(size, rnd) + v := varopt.New[thing](size, rnd) weights := make([]float64, b.N) for i := 0; i < b.N; i++ { weights[i] = f(rnd) @@ -67,6 +69,6 @@ func benchmarkAdd(b *testing.B, size int, f func(rnd *rand.Rand) float64) { b.StartTimer() for i := 0; i < b.N; i++ { - v.Add(nil, weights[i]) + v.Add(thing{}, weights[i]) } } diff --git a/frequency_test.go b/frequency_test.go index e4b3429..a8b0f3a 100644 --- a/frequency_test.go +++ b/frequency_test.go @@ -16,7 +16,7 @@ type curve struct { stddev float64 } -type point struct { +type testPoint struct { color int xvalue float64 } @@ -47,7 +47,7 @@ func ExampleVaropt_GetOriginalWeight() { // Construct a timeseries consisting of three colored signals, // for x=0 to x=60 seconds. - var points []point + var points []testPoint // origCounts stores the original signals at second granularity. origCounts := make([][]int, len(colors)) @@ -66,7 +66,7 @@ func ExampleVaropt_GetOriginalWeight() { continue } origCounts[choose][int(math.Floor(xvalue))]++ - points = append(points, point{ + points = append(points, testPoint{ color: choose, xvalue: xvalue, }) @@ -83,7 +83,7 @@ func ExampleVaropt_GetOriginalWeight() { // weight. This ensures a uniform distribution of points in each // second. sampleSize := int(sampleRatio * float64(totalCount)) - sampler := varopt.New(sampleSize, rnd) + sampler := varopt.New[testPoint](sampleSize, rnd) for _, point := range points { second := int(math.Floor(point.xvalue)) prob := float64(xcount[second]) / float64(totalCount) @@ -103,9 +103,8 @@ func ExampleVaropt_GetOriginalWeight() { // The effective count of each sample point is its output // weight divided by its original weight. for i := 0; i < sampler.Size(); i++ { - sample, weight := sampler.Get(i) + point, weight := sampler.Get(i) origWeight := sampler.GetOriginalWeight(i) - point := sample.(point) second := int(math.Floor(point.xvalue)) sampleCounts[point.color][second] += (weight / origWeight) pointCounts[second]++ diff --git a/go.mod b/go.mod index e61e0b6..6d6fe29 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,10 @@ module github.com/lightstep/varopt -go 1.15 +go 1.21 -require github.com/stretchr/testify v1.4.0 +require ( + github.com/kr/pretty v0.3.1 // indirect + github.com/rogpeppe/go-internal v1.10.0 // indirect + github.com/stretchr/testify v1.8.4 + gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect +) diff --git a/go.sum b/go.sum index 8fdee58..519e76d 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,30 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/sampleheap.go b/internal/sampleheap.go index c3d82a5..3b9a201 100644 --- a/internal/sampleheap.go +++ b/internal/sampleheap.go @@ -2,14 +2,14 @@ package internal -type Vsample struct { - Sample interface{} +type Vsample[T any] struct { + Sample T Weight float64 } -type SampleHeap []Vsample +type SampleHeap[T any] []Vsample[T] -func (sh *SampleHeap) Push(v Vsample) { +func (sh *SampleHeap[T]) Push(v Vsample[T]) { l := append(*sh, v) n := len(l) - 1 @@ -27,7 +27,7 @@ func (sh *SampleHeap) Push(v Vsample) { *sh = l } -func (sh *SampleHeap) Pop() Vsample { +func (sh *SampleHeap[T]) Pop() Vsample[T] { l := *sh n := len(l) - 1 result := l[0] diff --git a/internal/sampleheap_test.go b/internal/sampleheap_test.go index 50615c0..d9647a1 100644 --- a/internal/sampleheap_test.go +++ b/internal/sampleheap_test.go @@ -38,17 +38,20 @@ func (s *simpleHeap) Pop() interface{} { } func TestLargeHeap(t *testing.T) { - var L internal.SampleHeap + var L internal.SampleHeap[float64] var S simpleHeap for i := 0; i < 1e6; i++ { v := rand.NormFloat64() - L.Push(internal.Vsample{Weight: v}) + L.Push(internal.Vsample[float64]{ + Sample: v, + Weight: v, + }) heap.Push(&S, v) } for len(S) > 0 { - v1 := heap.Pop(&S).(float64) + v1 := heap.Pop(&S) v2 := L.Pop().Weight require.Equal(t, v1, v2) diff --git a/simple/simple.go b/simple/simple.go index e3fb340..f36c622 100644 --- a/simple/simple.go +++ b/simple/simple.go @@ -4,62 +4,63 @@ package simple import ( "math/rand" - - "github.com/lightstep/varopt" ) // Simple implements unweighted reservoir sampling using Algorithm R // from "Random sampling with a reservoir" by Jeffrey Vitter (1985) // https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R -type Simple struct { +type Simple[T any] struct { capacity int observed int - buffer []varopt.Sample + buffer []T rnd *rand.Rand } // New returns a simple reservoir sampler with given capacity // (i.e., reservoir size) and random number generator. -func New(capacity int, rnd *rand.Rand) *Simple { - return &Simple{ +func New[T any](capacity int, rnd *rand.Rand) *Simple[T] { + s := &Simple[T]{} + s.Init(capacity, rnd) + return s +} + +func (s *Simple[T]) Init(capacity int, rnd *rand.Rand) { + *s = Simple[T]{ capacity: capacity, + buffer: make([]T, 0, s.capacity), rnd: rnd, } } // Add considers a new observation for the sample. Items have unit // weight. -func (s *Simple) Add(span varopt.Sample) { +func (s *Simple[T]) Add(item T) { s.observed++ - if s.buffer == nil { - s.buffer = make([]varopt.Sample, 0, s.capacity) - } - if len(s.buffer) < s.capacity { - s.buffer = append(s.buffer, span) + s.buffer = append(s.buffer, item) return } // Give this a capacity/observed chance of replacing an existing entry. index := s.rnd.Intn(s.observed) if index < s.capacity { - s.buffer[index] = span + s.buffer[index] = item } } // Get returns the i'th selected item from the sample. -func (s *Simple) Get(i int) varopt.Sample { +func (s *Simple[T]) Get(i int) T { return s.buffer[i] } // Size returns the number of items in the sample. If the reservoir is // full, Size() equals Capacity(). -func (s *Simple) Size() int { +func (s *Simple[T]) Size() int { return len(s.buffer) } // Count returns the number of items that were observed. -func (s *Simple) Count() int { +func (s *Simple[T]) Count() int { return s.observed } diff --git a/simple/simple_test.go b/simple/simple_test.go index 4ea9592..fe8cf01 100644 --- a/simple/simple_test.go +++ b/simple/simple_test.go @@ -10,8 +10,6 @@ import ( "github.com/stretchr/testify/require" ) -type iRec int - func TestSimple(t *testing.T) { const ( popSize = 1e6 @@ -22,11 +20,11 @@ func TestSimple(t *testing.T) { rnd := rand.New(rand.NewSource(17167)) - ss := simple.New(sampleSize, rnd) + ss := simple.New[int](sampleSize, rnd) psum := 0. for i := 0; i < popSize; i++ { - ss.Add(iRec(i)) + ss.Add(i) psum += float64(i) } @@ -34,7 +32,7 @@ func TestSimple(t *testing.T) { ssum := 0.0 for i := 0; i < sampleSize; i++ { - ssum += float64(ss.Get(i).(iRec)) + ssum += float64(ss.Get(i)) } require.InEpsilon(t, ssum/float64(ss.Size()), psum/popSize, epsilon) diff --git a/varopt.go b/varopt.go index dbe605b..179e689 100644 --- a/varopt.go +++ b/varopt.go @@ -15,18 +15,18 @@ import ( // Duffield, Haim Kaplan, Carsten Lund, Mikkel Thorup 2008 // // https://arxiv.org/pdf/0803.0473.pdf -type Varopt struct { +type Varopt[T any] struct { // Random number generator rnd *rand.Rand // Large-weight items stored in a min-heap. - L internal.SampleHeap + L internal.SampleHeap[T] // Light-weight items. - T []internal.Vsample + T []internal.Vsample[T] // Temporary buffer. - X []internal.Vsample + X []internal.Vsample[T] // Current threshold tau float64 @@ -38,27 +38,28 @@ type Varopt struct { totalWeight float64 } -// Sample is an empty interface that represents a sample item. -// Sampling algorithms treat these as opaque, as their weight is -// passed in separately. -type Sample interface{} - var ErrInvalidWeight = fmt.Errorf("Negative, Zero, Inf or NaN weight") // New returns a new Varopt sampler with given capacity (i.e., // reservoir size) and random number generator. -func New(capacity int, rnd *rand.Rand) *Varopt { - return &Varopt{ +func New[T any](capacity int, rnd *rand.Rand) *Varopt[T] { + v := &Varopt[T]{} + v.Init(capacity, rnd) + return v +} + +func (v *Varopt[T]) Init(capacity int, rnd *rand.Rand) { + *v = Varopt[T]{ capacity: capacity, rnd: rnd, - L: make(internal.SampleHeap, 0, capacity), - T: make(internal.SampleHeap, 0, capacity), + L: make(internal.SampleHeap[T], 0, capacity), + T: make(internal.SampleHeap[T], 0, capacity), } } // Reset returns the sampler to its initial state, maintaining its // capacity and random number source. -func (s *Varopt) Reset() { +func (s *Varopt[T]) Reset() { s.L = s.L[:0] s.T = s.T[:0] s.X = s.X[:0] @@ -72,14 +73,15 @@ func (s *Varopt) Reset() { // is returned to allow re-use of memory. // // An error will be returned if the weight is either negative or NaN. -func (s *Varopt) Add(sample Sample, weight float64) (Sample, error) { - individual := internal.Vsample{ - Sample: sample, +func (s *Varopt[T]) Add(item T, weight float64) (T, error) { + var zero T + individual := internal.Vsample[T]{ + Sample: item, Weight: weight, } if weight <= 0 || math.IsNaN(weight) || math.IsInf(weight, 1) { - return nil, ErrInvalidWeight + return zero, ErrInvalidWeight } s.totalCount++ @@ -87,7 +89,7 @@ func (s *Varopt) Add(sample Sample, weight float64) (Sample, error) { if s.Size() < s.capacity { s.L.Push(individual) - return nil, nil + return zero, nil } // the X <- {} step from the paper is not done here, @@ -117,7 +119,7 @@ func (s *Varopt) Add(sample Sample, weight float64) (Sample, error) { r -= (1 - wxd/s.tau) d++ } - var eject Sample + var eject T if r < 0 { if d < len(s.X) { s.X[d], s.X[len(s.X)-1] = s.X[len(s.X)-1], s.X[d] @@ -135,7 +137,7 @@ func (s *Varopt) Add(sample Sample, weight float64) (Sample, error) { return eject, nil } -func (s *Varopt) uniform() float64 { +func (s *Varopt[T]) uniform() float64 { for { r := s.rnd.Float64() if r != 0.0 { @@ -147,7 +149,7 @@ func (s *Varopt) uniform() float64 { // Get() returns the i'th sample and its adjusted weight. To obtain // the sample's original weight (i.e. what was passed to Add), use // GetOriginalWeight(i). -func (s *Varopt) Get(i int) (Sample, float64) { +func (s *Varopt[T]) Get(i int) (T, float64) { if i < len(s.L) { return s.L[i].Sample, s.L[i].Weight } @@ -158,7 +160,7 @@ func (s *Varopt) Get(i int) (Sample, float64) { // GetOriginalWeight returns the original input weight of the sample // item that was passed to Add(). This can be useful for computing a // frequency from the adjusted sample weight. -func (s *Varopt) GetOriginalWeight(i int) float64 { +func (s *Varopt[T]) GetOriginalWeight(i int) float64 { if i < len(s.L) { return s.L[i].Weight } @@ -168,29 +170,29 @@ func (s *Varopt) GetOriginalWeight(i int) float64 { // Capacity returns the size of the reservoir. This is the maximum // size of the sample. -func (s *Varopt) Capacity() int { +func (s *Varopt[T]) Capacity() int { return s.capacity } // Size returns the current number of items in the sample. If the // reservoir is full, this returns Capacity(). -func (s *Varopt) Size() int { +func (s *Varopt[T]) Size() int { return len(s.L) + len(s.T) } // TotalWeight returns the sum of weights that were passed to Add(). -func (s *Varopt) TotalWeight() float64 { +func (s *Varopt[T]) TotalWeight() float64 { return s.totalWeight } // TotalCount returns the number of calls to Add(). -func (s *Varopt) TotalCount() int { +func (s *Varopt[T]) TotalCount() int { return s.totalCount } // Tau returns the current large-weight threshold. Weights larger // than Tau() carry their exact weight in the sample. See the VarOpt // paper for details. -func (s *Varopt) Tau() float64 { +func (s *Varopt[T]) Tau() float64 { return s.tau } diff --git a/varopt_test.go b/varopt_test.go index a56c691..ded79e8 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -27,6 +27,8 @@ const ( epsilon = 0.08 ) +type testInt int + func TestUnbiased(t *testing.T) { var ( // Ratio of big blocks to small blocks @@ -55,12 +57,12 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { extra = popSize - bigSize*numBig - smallSize*numSmall ) - population := make([]varopt.Sample, popSize) + population := make([]testInt, popSize) psum := 0.0 for i := range population { - population[i] = i + population[i] = testInt(i) psum += float64(i) } @@ -69,17 +71,17 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { // population[i], population[j] = population[j], population[i] // }) - smallBlocks := make([][]varopt.Sample, numSmall) - bigBlocks := make([][]varopt.Sample, numBig) + smallBlocks := make([][]testInt, numSmall) + bigBlocks := make([][]testInt, numBig) for i := 0; i < numSmall; i++ { - smallBlocks[i] = make([]varopt.Sample, smallSize) + smallBlocks[i] = make([]testInt, smallSize) } for i := 0; i < numBig; i++ { if i == 0 { - bigBlocks[0] = make([]varopt.Sample, bigSize+extra) + bigBlocks[0] = make([]testInt, bigSize+extra) } else { - bigBlocks[i] = make([]varopt.Sample, bigSize) + bigBlocks[i] = make([]testInt, bigSize) } } @@ -101,13 +103,13 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { maxDiff := 0.0 rnd := rand.New(rand.NewSource(98887)) - func(allBlockLists ...[][][]varopt.Sample) { + func(allBlockLists ...[][][]testInt) { for _, blockLists := range allBlockLists { - vsample := varopt.New(sampleSize, rnd) + vsample := varopt.New[testInt](sampleSize, rnd) for _, blockList := range blockLists { for _, block := range blockList { - ss := simple.New(sampleSize, rnd) + ss := simple.New[testInt](sampleSize, rnd) for _, s := range block { ss.Add(s) @@ -126,7 +128,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { for i := 0; i < vsample.Size(); i++ { v, w := vsample.Get(i) - vi := v.(int) + vi := int(v) if vi%2 == 0 { even++ } else { @@ -143,22 +145,22 @@ func testUnbiased(t *testing.T, bbr, bsr float64) { require.InEpsilon(t, odd, even, epsilon) } }( - [][][]varopt.Sample{bigBlocks, smallBlocks}, - [][][]varopt.Sample{smallBlocks, bigBlocks}, + [][][]testInt{bigBlocks, smallBlocks}, + [][][]testInt{smallBlocks, bigBlocks}, ) } func TestInvalidWeight(t *testing.T) { rnd := rand.New(rand.NewSource(98887)) - v := varopt.New(1, rnd) + v := varopt.New[testInt](1, rnd) - _, err := v.Add(nil, math.NaN()) + _, err := v.Add(1, math.NaN()) require.Equal(t, err, varopt.ErrInvalidWeight) - _, err = v.Add(nil, -1) + _, err = v.Add(1, -1) require.Equal(t, err, varopt.ErrInvalidWeight) - _, err = v.Add(nil, 0) + _, err = v.Add(1, 0) require.Equal(t, err, varopt.ErrInvalidWeight) } @@ -166,11 +168,11 @@ func TestReset(t *testing.T) { const capacity = 10 const insert = 100 rnd := rand.New(rand.NewSource(98887)) - v := varopt.New(capacity, rnd) + v := varopt.New[testInt](capacity, rnd) sum := 0. for i := 1.; i <= insert; i++ { - v.Add(nil, i) + v.Add(testInt(i), i) sum += i } @@ -192,8 +194,8 @@ func TestEject(t *testing.T) { const rounds = 10000 const maxvalue = 10000 - entries := make([]int, capacity+1) - freelist := make([]*int, capacity+1) + entries := make([]testInt, capacity+1) + freelist := make([]*testInt, capacity+1) for i := range entries { freelist[i] = &entries[i] @@ -204,11 +206,11 @@ func TestEject(t *testing.T) { rnd2 := rand.New(rand.NewSource(98887)) vsrc := rand.New(rand.NewSource(98887)) - expected := varopt.New(capacity, rnd1) - ejector := varopt.New(capacity, rnd2) + expected := varopt.New[*testInt](capacity, rnd1) + ejector := varopt.New[*testInt](capacity, rnd2) for i := 0; i < rounds; i++ { - value := vsrc.Intn(maxvalue) + value := testInt(vsrc.Intn(maxvalue)) weight := vsrc.ExpFloat64() _, _ = expected.Add(&value, weight) @@ -221,7 +223,7 @@ func TestEject(t *testing.T) { eject, _ := ejector.Add(item, weight) if eject != nil { - freelist = append(freelist, eject.(*int)) + freelist = append(freelist, eject) } } @@ -234,7 +236,7 @@ func TestEject(t *testing.T) { expectItem, expectWeight := expected.Get(i) ejectItem, ejectWeight := expected.Get(i) - require.Equal(t, *expectItem.(*int), *ejectItem.(*int)) + require.Equal(t, *expectItem, *ejectItem) require.Equal(t, expectWeight, ejectWeight) } } diff --git a/weighted_test.go b/weighted_test.go index 1e82043..1518723 100644 --- a/weighted_test.go +++ b/weighted_test.go @@ -28,7 +28,7 @@ func ExampleNew() { trueTotalWeight := 0.0 rnd := rand.New(rand.NewSource(32491)) - sampler := varopt.New(totalPackets*sampleRatio, rnd) + sampler := varopt.New[packet](totalPackets*sampleRatio, rnd) for i := 0; i < totalPackets; i++ { packet := packet{ @@ -49,8 +49,7 @@ func ExampleNew() { estTotalWeight := 0.0 for i := 0; i < sampler.Size(); i++ { - sample, weight := sampler.Get(i) - packet := sample.(packet) + packet, weight := sampler.Get(i) estSizeByColor[packet.color] += weight estSizeByProtocol[packet.protocol] += weight estTotalWeight += weight From 4cad61273167340d54015896ba65c96ad5245dc4 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 20 Nov 2023 13:14:30 -0800 Subject: [PATCH 28/30] remove this --- .gitignore~ | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .gitignore~ diff --git a/.gitignore~ b/.gitignore~ deleted file mode 100644 index 42e9f7c..0000000 --- a/.gitignore~ +++ /dev/null @@ -1,2 +0,0 @@ -go.work -go.work.sum From b627cc4df557edbcac650e7f3c701218cbd58705 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Thu, 23 Nov 2023 05:13:27 -0800 Subject: [PATCH 29/30] test copyfrom; go work sync --- go.mod | 8 ++++++-- go.sum | 11 ++--------- varopt.go | 18 ++++++++++++++++++ varopt_test.go | 23 +++++++++++++++++++++++ 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index 6d6fe29..c60faa9 100644 --- a/go.mod +++ b/go.mod @@ -2,9 +2,13 @@ module github.com/lightstep/varopt go 1.21 +require github.com/stretchr/testify v1.8.4 + require ( + github.com/davecgh/go-spew v1.1.1 // indirect github.com/kr/pretty v0.3.1 // indirect - github.com/rogpeppe/go-internal v1.10.0 // indirect - github.com/stretchr/testify v1.8.4 + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rogpeppe/go-internal v1.11.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 519e76d..fa915d6 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,4 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= @@ -13,18 +12,12 @@ github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsK github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= -github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/varopt.go b/varopt.go index 179e689..e1fbd2a 100644 --- a/varopt.go +++ b/varopt.go @@ -48,6 +48,8 @@ func New[T any](capacity int, rnd *rand.Rand) *Varopt[T] { return v } +// Init initializes a Varopt[T] in-place, avoiding an allocation +// compared with New(). func (v *Varopt[T]) Init(capacity int, rnd *rand.Rand) { *v = Varopt[T]{ capacity: capacity, @@ -68,6 +70,22 @@ func (s *Varopt[T]) Reset() { s.totalWeight = 0 } +// CopyFrom copies the fields of `from` into this Varopt[T]. +func (s *Varopt[T]) CopyFrom(from *Varopt[T]) { + // Copy non-slice fields + cpy := *from + // Keep existing slices, reset + cpy.L = s.L[:0] + cpy.T = s.T[:0] + cpy.X = s.X[:0] + // Append to existing slices + cpy.L = append(cpy.L, from.L...) + cpy.T = append(cpy.T, from.T...) + cpy.X = append(cpy.X, from.X...) + // Assign back to `s` + *s = cpy +} + // Add considers a new observation for the sample with given weight. // If there is an item ejected from the sample as a result, the item // is returned to allow re-use of memory. diff --git a/varopt_test.go b/varopt_test.go index ded79e8..867b7f7 100644 --- a/varopt_test.go +++ b/varopt_test.go @@ -181,12 +181,35 @@ func TestReset(t *testing.T) { require.Equal(t, sum, v.TotalWeight()) require.Less(t, 0., v.Tau()) + var v2 varopt.Varopt[testInt] + v2.Init(capacity, rnd) + v2.CopyFrom(v) + + var expect []testInt + for i := 0; i < v.Size(); i++ { + got, _ := v.Get(i) + expect = append(expect, got) + } + v.Reset() require.Equal(t, 0, v.Size()) require.Equal(t, 0, v.TotalCount()) require.Equal(t, 0., v.TotalWeight()) require.Equal(t, 0., v.Tau()) + + require.Equal(t, capacity, v2.Size()) + require.Equal(t, insert, v2.TotalCount()) + require.Equal(t, sum, v2.TotalWeight()) + require.Less(t, 0., v2.Tau()) + + var have []testInt + for i := 0; i < v2.Size(); i++ { + got, _ := v2.Get(i) + have = append(have, got) + } + require.Equal(t, expect, have) + } func TestEject(t *testing.T) { From 8f3da1f92329c8d3a8b160c3c20e51eff2194d46 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Mon, 27 Nov 2023 08:34:27 -0800 Subject: [PATCH 30/30] add github dir --- .circleci/config.yml | 16 ---------- .github/ISSUE_TEMPLATE/bug_report.md | 20 ++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++ .github/pull_request_template.md | 9 ++++++ .github/workflows/build.yml | 32 +++++++++++++++++++ .github/workflows/build.yml~ | 38 +++++++++++++++++++++++ 6 files changed, 119 insertions(+), 16 deletions(-) delete mode 100644 .circleci/config.yml create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/build.yml~ diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 3b06f3c..0000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: 2 - -jobs: - test: - working_directory: ~/go/src/github.com/lightstep/varopt - docker: - - image: circleci/golang:1.15 - steps: - - checkout - - run: go test -v ./... - -workflows: - version: 2 - test: - jobs: - - test diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..4c0f8e8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,20 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior. Include code samples here when possible. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..713fcc0 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,9 @@ +**Description:** + +**Link to tracking Issue:** + +**Testing:** < Describe what testing was performed and which tests were added.> + +**Documentation:** < Describe the documentation added.> \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..48e9074 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,32 @@ +name: build + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + + build: + strategy: + matrix: + go-version: [1.21.3] + name: Build + runs-on: ubuntu-latest + steps: + + - name: Set up Go 1.x + uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go-version }} + id: go + + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + + - name: Build + run: make ci + + - name: Code coverage + run: bash <(curl -s https://codecov.io/bash) diff --git a/.github/workflows/build.yml~ b/.github/workflows/build.yml~ new file mode 100644 index 0000000..f3d87dc --- /dev/null +++ b/.github/workflows/build.yml~ @@ -0,0 +1,38 @@ +name: build + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + + build: + strategy: + matrix: + go-version: [1.21.3] + name: Build + runs-on: ubuntu-latest + steps: + + - name: Set up Go 1.x + uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go-version }} + id: go + + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + + - name: Build + run: make ci + + - name: Code coverage + run: bash <(curl -s https://codecov.io/bash) + + - name: Send telemetry to Lightstep + uses: codeboten/github-action-to-otlp@v1 + with: + endpoint: "ingest.lightstep.com:443" + headers: "lightstep-access-token=${{ secrets.ACCESS_TOKEN }}"