Skip to content

Commit

Permalink
Merge pull request #2 from lightstep/jmacd/godocex
Browse files Browse the repository at this point in the history
Clean up the godoc
  • Loading branch information
jmacd authored Nov 8, 2019
2 parents 0f1df4e + 68f1fab commit bef2465
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 14 deletions.
22 changes: 22 additions & 0 deletions doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2019, LightStep Inc.

/*
Package varopt is an implementation of VarOpt, an unbiased weighted
sampling algorithm described in the paper "Stream sampling for
variance-optimal estimation of subset sums"
https://arxiv.org/pdf/0803.0473.pdf (2008), by Edith Cohen, Nick
Duffield, Haim Kaplan, Carsten Lund, and Mikkel Thorup.
VarOpt is a reservoir-type sampler that maintains a fixed-size sample
and provides a mechanism for merging unequal-weight samples.
This package also includes a simple reservoir sampling algorithm,
often useful in conjunction with weighed reservoir sampling, using
Algorithm R from "Random sampling with a
reservoir", https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R
(1985), by Jeffrey Vitter.
See https://github.com/lightstep/varopt/blob/master/README.md for
more detail.
*/
package varopt
2 changes: 1 addition & 1 deletion frequency_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ var colors = []curve{
// While the number of expected points per second is uniform, the
// output sample weights are expected to match the original
// frequencies.
func ExampleFrequency() {
func ExampleVaropt_GetOriginalWeight() {
// Number of points.
const totalCount = 1e6

Expand Down
21 changes: 14 additions & 7 deletions simple.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,20 @@ type Simple struct {
capacity int
observed int
buffer []Sample
rnd *rand.Rand
}

func NewSimple(capacity int) *Simple {
// NewSimple returns a simple reservoir sampler with given capacity
// (i.e., reservoir size) and random number generator.
func NewSimple(capacity int, rnd *rand.Rand) *Simple {
return &Simple{
capacity: capacity,
rnd: rnd,
}
}

// Add considers a new observation for the sample. Items have unit
// weight.
func (s *Simple) Add(span Sample) {
s.observed++

Expand All @@ -34,28 +40,29 @@ func (s *Simple) Add(span Sample) {
}

// Give this a capacity/observed chance of replacing an existing entry.
index := rand.Intn(s.observed)
index := s.rnd.Intn(s.observed)
if index < s.capacity {
s.buffer[index] = span
}
}

// Get returns the i'th selected item from the sample.
func (s *Simple) Get(i int) Sample {
return s.buffer[i]
}

// Get returns the number of items in the sample. If the reservoir is
// full, Size() equals Capacity().
func (s *Simple) Size() int {
return len(s.buffer)
}

// Weight returns the adjusted weight of each item in the sample.
func (s *Simple) Weight() float64 {
return float64(s.observed) / float64(s.Size())
}

func (s *Simple) Prob() float64 {
return 1 / s.Weight()
}

func (s *Simple) Observed() int {
// Count returns the number of items that were observed.
func (s *Simple) Count() int {
return s.observed
}
5 changes: 4 additions & 1 deletion simple_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package varopt_test

import (
"math/rand"
"testing"

"github.com/lightstep/varopt"
Expand All @@ -19,7 +20,9 @@ func TestSimple(t *testing.T) {
epsilon = 0.01
)

ss := varopt.NewSimple(sampleSize)
rnd := rand.New(rand.NewSource(17167))

ss := varopt.NewSimple(sampleSize, rnd)

psum := 0.
for i := 0; i < popSize; i++ {
Expand Down
18 changes: 18 additions & 0 deletions varopt.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ type Varopt struct {
totalWeight float64
}

// Sample is an empty interface that represents a sample item.
// Sampling algorithms treat these as opaque, as their weight is
// passed in separately.
type Sample interface{}

type vsample struct {
Expand All @@ -45,13 +48,16 @@ type vsample struct {

type largeHeap []vsample

// New returns a new Varopt sampler with given capacity (i.e.,
// reservoir size) and random number generator.
func New(capacity int, rnd *rand.Rand) *Varopt {
return &Varopt{
capacity: capacity,
rnd: rnd,
}
}

// Add considers a new observation for the sample with given weight.
func (s *Varopt) Add(sample Sample, weight float64) {
individual := vsample{
sample: sample,
Expand Down Expand Up @@ -131,6 +137,9 @@ func (s *Varopt) Get(i int) (Sample, float64) {
return s.T[i-len(s.L)].sample, s.tau
}

// GetOriginalWeight returns the original input weight of the sample
// item that was passed to Add(). This can be useful for computing a
// frequency from the adjusted sample weight.
func (s *Varopt) GetOriginalWeight(i int) float64 {
if i < len(s.L) {
return s.L[i].weight
Expand All @@ -139,22 +148,31 @@ func (s *Varopt) GetOriginalWeight(i int) float64 {
return s.T[i-len(s.L)].weight
}

// Capacity returns the size of the reservoir. This is the maximum
// size of the sample.
func (s *Varopt) Capacity() int {
return s.capacity
}

// Size returns the current number of items in the sample. If the
// reservoir is full, this returns Capacity().
func (s *Varopt) Size() int {
return len(s.L) + len(s.T)
}

// TotalWeight returns the sum of weights that were passed to Add().
func (s *Varopt) TotalWeight() float64 {
return s.totalWeight
}

// TotalCount returns the number of calls to Add().
func (s *Varopt) TotalCount() int {
return s.totalCount
}

// Tau returns the current large-weight threshold. Weights larger
// than Tau() carry their exact weight int he sample. See the VarOpt
// paper for details.
func (s *Varopt) Tau() float64 {
return s.tau
}
Expand Down
6 changes: 2 additions & 4 deletions varopt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ const (
sampleProb = 0.001
sampleSize int = popSize * sampleProb

// TODO epsilon is somewhat variable b/c we're using the
// static rand w/o a fixed seed for the test.
epsilon = 0.06
epsilon = 0.08
)

func TestUnbiased(t *testing.T) {
Expand Down Expand Up @@ -108,7 +106,7 @@ func testUnbiased(t *testing.T, bbr, bsr float64) {

for _, blockList := range blockLists {
for _, block := range blockList {
simple := varopt.NewSimple(sampleSize)
simple := varopt.NewSimple(sampleSize, rnd)

for _, s := range block {
simple.Add(s)
Expand Down
2 changes: 1 addition & 1 deletion weighted_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ type packet struct {
protocol string
}

func ExampleWeighted() {
func ExampleNew() {
const totalPackets = 1e6
const sampleRatio = 0.01

Expand Down

0 comments on commit bef2465

Please sign in to comment.