-
Notifications
You must be signed in to change notification settings - Fork 174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[wip] start maintaining an internal count of time series produced per metric #246
Changes from 6 commits
d81d132
d7affac
7a4bb30
a1d32d8
019e69c
8585da7
47aa894
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,8 +3,10 @@ package samplers | |
import ( | ||
"bytes" | ||
"encoding/binary" | ||
"encoding/gob" | ||
"fmt" | ||
"hash/fnv" | ||
"log" | ||
"math" | ||
"strings" | ||
"time" | ||
|
@@ -72,6 +74,101 @@ type JSONMetric struct { | |
Value []byte `json:"value"` | ||
} | ||
|
||
const CardinalityCountName = "cardinality.count" | ||
const CardinalityCountType = "cardinality_count" | ||
|
||
// CardinalityCount is a sampler that records an approximate cardinality count | ||
// by metric name. | ||
type CardinalityCount struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's really cool that you implemented this as a sampler! |
||
MetricCardinality map[string]*hyperloglog.HyperLogLogPlus | ||
} | ||
|
||
func NewCardinalityCount() *CardinalityCount { | ||
return &CardinalityCount{make(map[string]*hyperloglog.HyperLogLogPlus)} | ||
} | ||
|
||
// Sample adds a measurement to the cardinality counter. It makes the | ||
// assumption that no two metrics with different types share a name (ie, name | ||
// is unique across all types) | ||
func (cc *CardinalityCount) Sample(name string, joinedTags string) { | ||
if _, present := cc.MetricCardinality[name]; !present { | ||
hll, _ := hyperloglog.NewPlus(18) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we extract 18 into a const? we can keep it unexported for now |
||
cc.MetricCardinality[name] = hll | ||
} | ||
hasher := fnv.New64a() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
hasher.Write([]byte(joinedTags)) | ||
cc.MetricCardinality[name].Add(hasher) | ||
} | ||
|
||
// Export groups the CardinalityCount struct into sub structs, grouped by what | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment about 'sub strict' |
||
// their metric consistently hashes to | ||
func (cc *CardinalityCount) ExportSets() ([]JSONMetric, error) { | ||
jsonMetrics := make([]JSONMetric, 0, len(cc.MetricCardinality)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! Thank you for pre- |
||
for metricName, hll := range cc.MetricCardinality { | ||
|
||
buf := new(bytes.Buffer) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I'm not much of a fan of In this case, the zero value would work if we then pass |
||
encoder := gob.NewEncoder(buf) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We recently had #220 show up which got us into a bit of trouble because the transmission of this sort of data isn't versioned or anything. I get a bit worried when I see new gob-encoded things. This is probably not actionable now but something we should consider for the future. Should we version the import endpoint or something? |
||
|
||
err := encoder.Encode(map[string]*hyperloglog.HyperLogLogPlus{metricName: hll}) | ||
if err != nil { | ||
// TODO (kiran): do we return an array of metrics, instead? | ||
log.Printf("failed to export cardinality count for metric name:%s, error:%v", metricName, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At the moment, we don't use logrus (and therefore our Sentry hooks) here. Ideally, this should go to Sentry, though that'll require a little more work to configure, since this is in a subpackage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. from IRL: we're going to make a custom error type to return to the caller here, and have the caller emit to Sentry via logrus, instead |
||
} | ||
|
||
jm := JSONMetric{ | ||
MetricKey: MetricKey{ | ||
Name: CardinalityCountName, | ||
Type: CardinalityCountType, | ||
JoinedTags: metricName, | ||
}, | ||
Tags: []string{metricName}, | ||
Value: buf.Bytes(), | ||
} | ||
jsonMetrics = append(jsonMetrics, jm) | ||
} | ||
// TODO (kiran): do we return an array of metrics, instead? | ||
return jsonMetrics, nil | ||
} | ||
|
||
// Combine merges cardinality count structs exported from locals | ||
func (cc *CardinalityCount) Combine(other []byte) error { | ||
var decodedMap map[string]*hyperloglog.HyperLogLogPlus | ||
buf := bytes.NewReader(other) | ||
decoder := gob.NewDecoder(buf) | ||
|
||
err := decoder.Decode(&decodedMap) | ||
if err != nil { | ||
panic(err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return instead of panic? |
||
} | ||
|
||
otherCC := CardinalityCount{ | ||
MetricCardinality: decodedMap, | ||
} | ||
for name, hll := range otherCC.MetricCardinality { | ||
if _, ok := cc.MetricCardinality[name]; ok { | ||
cc.MetricCardinality[name].Merge(hll) | ||
} else { | ||
cc.MetricCardinality[name] = hll | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// Flush emits the names + cardinality of all the metrics in the map | ||
// TODO (kiran): investigate whether we'd want to emit only the top k metrics | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we also emit a counter here based on |
||
func (cc *CardinalityCount) Flush() []DDMetric { | ||
ddMetrics := make([]DDMetric, 0, len(cc.MetricCardinality)) | ||
for metricName, hll := range cc.MetricCardinality { | ||
ddMetric := DDMetric{ | ||
Name: CardinalityCountName, | ||
Value: [1][2]float64{{float64(time.Now().Unix()), float64(hll.Count())}}, | ||
Tags: []string{metricName}, | ||
} | ||
ddMetrics = append(ddMetrics, ddMetric) | ||
} | ||
return ddMetrics | ||
} | ||
|
||
// Counter is an accumulator | ||
type Counter struct { | ||
Name string | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,58 @@ func TestCounterMerge(t *testing.T) { | |
assert.Equal(t, float64(3.8), metrics[0].Value[0][1]) | ||
} | ||
|
||
func TestCardinality(t *testing.T) { | ||
rand.Seed(time.Now().Unix()) | ||
c := NewCardinalityCount() | ||
c.Sample("abc", "tag1tag2tag3") | ||
|
||
for i := 0; i < 100; i++ { | ||
c.Sample("abc", strconv.Itoa(rand.Int())) | ||
} | ||
assert.Equal(t, uint64(101), c.MetricCardinality["abc"].Count(), "counts did not match") | ||
} | ||
|
||
func TestCardinalityMerge(t *testing.T) { | ||
rand.Seed(time.Now().Unix()) | ||
|
||
cc := NewCardinalityCount() | ||
for i := 0; i < 100; i++ { | ||
cc.Sample("abc", strconv.Itoa(rand.Int())) | ||
} | ||
for i := 0; i < 150; i++ { | ||
cc.Sample("cde", strconv.Itoa(rand.Int())) | ||
} | ||
assert.Equal(t, uint64(100), cc.MetricCardinality["abc"].Count(), "counts did not match") | ||
assert.Equal(t, uint64(150), cc.MetricCardinality["cde"].Count(), "counts did not match") | ||
|
||
jsonMetrics, err := cc.ExportSets() | ||
assert.NoError(t, err, "should have exported successfully") | ||
assert.NotEmpty(t, jsonMetrics, "should have returned a value") | ||
assert.NotNil(t, jsonMetrics[0], "Need a value") | ||
|
||
cc2 := NewCardinalityCount() | ||
for i := 0; i < 50; i++ { | ||
cc2.Sample("abc", strconv.Itoa(rand.Int())) | ||
} | ||
for i := 0; i < 50; i++ { | ||
cc2.Sample("cde", strconv.Itoa(rand.Int())) | ||
} | ||
|
||
for _, jm := range jsonMetrics { | ||
assert.NoError(t, cc2.Combine(jm.Value), "should have combined successfully") | ||
} | ||
|
||
// // HLLs are approximate, and we've seen error of +-1 here in the past, so | ||
// // we're giving the test some room for error to reduce flakes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we extract the margin of error into a constant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use InDelta here! |
||
receivedCount := int(cc2.MetricCardinality["abc"].Count()) | ||
countDifference := 150 - int(receivedCount) | ||
assert.True(t, -2 <= countDifference && countDifference <= 2, "counts did not match after merging (%d and %d)", 150, receivedCount) | ||
|
||
receivedCount = int(cc2.MetricCardinality["cde"].Count()) | ||
countDifference = 200 - int(receivedCount) | ||
assert.True(t, -2 <= countDifference && countDifference <= 2, "counts did not match after merging (%d and %d)", 200, receivedCount) | ||
} | ||
|
||
func TestGauge(t *testing.T) { | ||
g := NewGauge("a.b.c", []string{"a:b"}) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,9 @@ type WorkerMetrics struct { | |
sets map[samplers.MetricKey]*samplers.Set | ||
timers map[samplers.MetricKey]*samplers.Histo | ||
|
||
// metametrics -- these store information about the metrics themselves | ||
cardinality *samplers.CardinalityCount | ||
|
||
// this is for counters which are globally aggregated | ||
globalCounters map[samplers.MetricKey]*samplers.Counter | ||
|
||
|
@@ -54,6 +57,7 @@ func NewWorkerMetrics() WorkerMetrics { | |
histograms: make(map[samplers.MetricKey]*samplers.Histo), | ||
sets: make(map[samplers.MetricKey]*samplers.Set), | ||
timers: make(map[samplers.MetricKey]*samplers.Histo), | ||
cardinality: samplers.NewCardinalityCount(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For posterity maybe note that we can't use a |
||
localHistograms: make(map[samplers.MetricKey]*samplers.Histo), | ||
localSets: make(map[samplers.MetricKey]*samplers.Set), | ||
localTimers: make(map[samplers.MetricKey]*samplers.Histo), | ||
|
@@ -200,6 +204,8 @@ func (w *Worker) ProcessMetric(m *samplers.UDPMetric) { | |
default: | ||
log.WithField("type", m.Type).Error("Unknown metric type for processing") | ||
} | ||
// record the metric's cardinality | ||
w.wm.cardinality.Sample(m.MetricKey.Name, m.MetricKey.JoinedTags) | ||
} | ||
|
||
// ImportMetric receives a metric from another veneur instance | ||
|
@@ -234,6 +240,10 @@ func (w *Worker) ImportMetric(other samplers.JSONMetric) { | |
if err := w.wm.timers[other.MetricKey].Combine(other.Value); err != nil { | ||
log.WithError(err).Error("Could not merge timers") | ||
} | ||
case samplers.CardinalityCountType: | ||
if err := w.wm.cardinality.Combine(other.Value); err != nil { | ||
log.WithError(err).Error("Could not merge cardinality") | ||
} | ||
default: | ||
log.WithField("type", other.Type).Error("Unknown metric type for importing") | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
package veneur | ||
|
||
import ( | ||
"math/rand" | ||
"strconv" | ||
"testing" | ||
|
||
"github.com/Sirupsen/logrus" | ||
|
@@ -78,3 +80,25 @@ func TestWorkerImportHistogram(t *testing.T) { | |
wm := w.Flush() | ||
assert.Len(t, wm.histograms, 1, "number of flushed histograms") | ||
} | ||
|
||
func TestWorkerImportCardinality(t *testing.T) { | ||
w := NewWorker(1, nil, logrus.New()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can actually do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ooh yes |
||
|
||
cc := samplers.NewCardinalityCount() | ||
for i := 0; i < 100; i++ { | ||
cc.Sample("abc", strconv.Itoa(rand.Int())) | ||
} | ||
for i := 0; i < 150; i++ { | ||
cc.Sample("cde", strconv.Itoa(rand.Int())) | ||
} | ||
|
||
jsonMetrics, err := cc.ExportSets() | ||
assert.NoError(t, err, "should have exported successfully") | ||
|
||
for _, jm := range jsonMetrics { | ||
w.ImportMetric(jm) | ||
} | ||
|
||
wm := w.Flush() | ||
assert.Len(t, wm.cardinality.MetricCardinality, 2, "number of flushed outputs") | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd recommend adding comments explaining the difference here