From 1c6e77307c77b3d8cfcc172c439fe5ec16dd177e Mon Sep 17 00:00:00 2001 From: Seif Lotfy Date: Thu, 6 Jul 2017 10:45:54 +0200 Subject: [PATCH] update to recent axiomhq/hyperloglog due to error in merging and beta --- Gopkg.lock | 2 +- Gopkg.toml | 1 - samplers/samplers.go | 4 +- .../github.com/axiomhq/hyperloglog/README.md | 2 +- .../axiomhq/hyperloglog/hyperloglog.go | 45 ++++++++++++------- .../axiomhq/hyperloglog/registers.go | 2 +- .../github.com/axiomhq/hyperloglog/sparse.go | 5 --- .../github.com/axiomhq/hyperloglog/utils.go | 7 +++ 8 files changed, 42 insertions(+), 26 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index f06e97ec2..d3828e3e7 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -22,7 +22,7 @@ branch = "master" name = "github.com/axiomhq/hyperloglog" packages = ["."] - revision = "f93c8603a35921d1f4d742cf063ff5d4c4092f21" + revision = "67c63c1769669ed114425d82cc7a91af4ce24f8b" [[projects]] name = "github.com/certifi/gocertifi" diff --git a/Gopkg.toml b/Gopkg.toml index 9ce725e5e..ae297c382 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -1,4 +1,3 @@ - # Gopkg.toml example # # Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md diff --git a/samplers/samplers.go b/samplers/samplers.go index b2580e5c5..019256938 100644 --- a/samplers/samplers.go +++ b/samplers/samplers.go @@ -225,7 +225,7 @@ func (s *Set) Sample(sample string, sampleRate float32) { func NewSet(Name string, Tags []string) *Set { // error is only returned if precision is outside the 4-18 range // TODO: this is the maximum precision, should it be configurable? - Hll, _ := hyperloglog.New(18) + Hll := hyperloglog.New() return &Set{ Name: Name, Tags: Tags, @@ -264,7 +264,7 @@ func (s *Set) Export() (JSONMetric, error) { // Combine merges the values seen with another set (marshalled as a byte slice) func (s *Set) Combine(other []byte) error { - otherHLL, _ := hyperloglog.New(18) + otherHLL := hyperloglog.New() if err := otherHLL.UnmarshalBinary(other); err != nil { return err } diff --git a/vendor/github.com/axiomhq/hyperloglog/README.md b/vendor/github.com/axiomhq/hyperloglog/README.md index c1c42b430..9e76a1f93 100644 --- a/vendor/github.com/axiomhq/hyperloglog/README.md +++ b/vendor/github.com/axiomhq/hyperloglog/README.md @@ -1,4 +1,4 @@ -# HyperLogLog [![GoDoc](https://godoc.org/github.com/axiomhq/hyperloglog?status.svg)](https://godoc.org/github.com/axiomhq/hyperloglog) +# HyperLogLog [![GoDoc](https://godoc.org/github.com/axiomhq/hyperloglog?status.svg)](https://godoc.org/github.com/axiomhq/hyperloglog) [![Go Report Card](https://goreportcard.com/badge/github.com/axiomhq/hyperloglog)](https://goreportcard.com/report/github.com/axiomhq/hyperloglog) ![cover.run go](https://cover.run/go/github.com/axiomhq/hyperloglog.svg) An improved version of [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) for the count-distinct problem, approximating the number of distinct elements in a multiset **using 20-50% less space** than other usual HyperLogLog implementations. This work is based on ["Better with fewer bits: Improving the performance of cardinality estimation of large data streams - Qingjun Xiao, You Zhou, Shigang Chen"](http://cse.seu.edu.cn/PersonalPage/csqjxiao/csqjxiao_files/papers/INFOCOM17.pdf). diff --git a/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go b/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go index 2666cf13c..ad4501d02 100644 --- a/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go +++ b/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go @@ -18,21 +18,36 @@ const ( // Sketch is a HyperLogLog data-structure for the count-distinct problem, // approximating the number of distinct elements in a multiset. type Sketch struct { - sparse bool - p uint8 - b uint8 - m uint32 - alpha float64 - - tmpSet set - hash func(e []byte) uint64 - + sparse bool + p uint8 + b uint8 + m uint32 + alpha float64 + tmpSet set sparseList *compressedList regs *registers } -// New ... -func New(precision uint8) (*Sketch, error) { +//New returns a HyperLogLog Sketch with 2^14 registers (precision 14) +func New() *Sketch { + return New14() +} + +//New14 returns a HyperLogLog Sketch with 2^14 registers (precision 14) +func New14() *Sketch { + sk, _ := new(14) + return sk +} + +//New16 returns a HyperLogLog Sketch with 2^16 registers (precision 16) +func New16() *Sketch { + sk, _ := new(14) + return sk +} + +// New returns a HyperLogLog Sketch with 2^precision registers +func new(precision uint8) (*Sketch, error) { + hash = hashFunc if precision < 4 || precision > 18 { return nil, fmt.Errorf("p has to be >= 4 and <= 18") } @@ -44,13 +59,13 @@ func New(precision uint8) (*Sketch, error) { sparse: true, tmpSet: set{}, sparseList: newCompressedList(int(m)), - hash: hash, }, nil } // Clone returns a deep copy of sk. func (sk *Sketch) Clone() *Sketch { return &Sketch{ + b: sk.b, p: sk.p, m: sk.m, alpha: sk.alpha, @@ -169,7 +184,7 @@ func (sk *Sketch) insert(i uint32, r uint8) { // Insert adds element e to sketch func (sk *Sketch) Insert(e []byte) { - x := sk.hash(e) + x := hash(e) if sk.sparse { sk.tmpSet.add(encodeHash(x, sk.p, pp)) if uint32(len(sk.tmpSet))*100 > sk.m { @@ -196,7 +211,7 @@ func (sk *Sketch) Estimate() uint64 { var est float64 var beta func(float64) float64 - if sk.m < 16 { + if sk.p < 16 { beta = beta14 } else { beta = beta16 @@ -308,7 +323,7 @@ func (sk *Sketch) UnmarshalBinary(data []byte) error { // Unmarshal p. p := data[1] - newh, err := New(p) + newh, err := new(p) if err != nil { return err } diff --git a/vendor/github.com/axiomhq/hyperloglog/registers.go b/vendor/github.com/axiomhq/hyperloglog/registers.go index 1c024b476..24bc67c26 100644 --- a/vendor/github.com/axiomhq/hyperloglog/registers.go +++ b/vendor/github.com/axiomhq/hyperloglog/registers.go @@ -44,7 +44,7 @@ func (rs *registers) clone() *registers { if rs == nil { return nil } - var tc []reg + tc := make([]reg, len(rs.tailcuts)) copy(tc, rs.tailcuts) return ®isters{ tailcuts: tc, diff --git a/vendor/github.com/axiomhq/hyperloglog/sparse.go b/vendor/github.com/axiomhq/hyperloglog/sparse.go index 64ade0752..47a619ae5 100644 --- a/vendor/github.com/axiomhq/hyperloglog/sparse.go +++ b/vendor/github.com/axiomhq/hyperloglog/sparse.go @@ -2,7 +2,6 @@ package hyperloglog import ( bits "github.com/dgryski/go-bits" - metro "github.com/dgryski/go-metro" ) func getIndex(k uint32, p, pp uint8) uint32 { @@ -85,7 +84,3 @@ type uint64Slice []uint32 func (p uint64Slice) Len() int { return len(p) } func (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] } func (p uint64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } - -func hash(e []byte) uint64 { - return metro.Hash64(e, 1337) -} diff --git a/vendor/github.com/axiomhq/hyperloglog/utils.go b/vendor/github.com/axiomhq/hyperloglog/utils.go index 6f0e1fa13..81ee4e714 100644 --- a/vendor/github.com/axiomhq/hyperloglog/utils.go +++ b/vendor/github.com/axiomhq/hyperloglog/utils.go @@ -4,6 +4,7 @@ import ( "math" bits "github.com/dgryski/go-bits" + metro "github.com/dgryski/go-metro" ) func beta14(ez float64) float64 { @@ -61,3 +62,9 @@ func bextr(v uint64, start, length uint8) uint64 { func bextr32(v uint32, start, length uint8) uint32 { return (v >> start) & ((1 << length) - 1) } + +func hashFunc(e []byte) uint64 { + return metro.Hash64(e, 1337) +} + +var hash func(buf []byte) uint64