-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
sidecar: fix issue #396: split response into chunks no bigger than 2^16 samples #718
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ package store | |
import ( | ||
"context" | ||
"fmt" | ||
"math" | ||
"net/url" | ||
"testing" | ||
"time" | ||
|
@@ -272,3 +273,68 @@ func TestPrometheusStore_Info(t *testing.T) { | |
testutil.Equals(t, int64(123), resp.MinTime) | ||
testutil.Equals(t, int64(456), resp.MaxTime) | ||
} | ||
|
||
// Regression test for https://github.com/improbable-eng/thanos/issues/396. | ||
func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T) { | ||
defer leaktest.CheckTimeout(t, 10*time.Second)() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
p, err := testutil.NewPrometheus() | ||
testutil.Ok(t, err) | ||
|
||
baseT := timestamp.FromTime(time.Now().AddDate(0, 0, -2)) / 1000 * 1000 | ||
|
||
a := p.Appender() | ||
|
||
offset := int64(math.MaxUint16 + 5) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth to make it 3 chunks maybe for solid test? (: |
||
for i := int64(0); i < offset; i++ { | ||
_, err = a.Add(labels.FromStrings("a", "b"), baseT + i, 1) | ||
testutil.Ok(t, err) | ||
} | ||
|
||
testutil.Ok(t, a.Commit()) | ||
|
||
ctx, cancel := context.WithCancel(context.Background()) | ||
defer cancel() | ||
|
||
testutil.Ok(t, p.Start()) | ||
defer func() { testutil.Ok(t, p.Stop()) }() | ||
|
||
u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr())) | ||
testutil.Ok(t, err) | ||
|
||
proxy, err := NewPrometheusStore(nil, nil, u, | ||
func() labels.Labels { | ||
return labels.FromStrings("region", "eu-west") | ||
}, nil) | ||
testutil.Ok(t, err) | ||
srv := newStoreSeriesServer(ctx) | ||
|
||
err = proxy.Series(&storepb.SeriesRequest{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can inline |
||
MinTime: baseT, | ||
MaxTime: baseT + offset, | ||
Matchers: []storepb.LabelMatcher{ | ||
{Type: storepb.LabelMatcher_EQ, Name: "a", Value: "b"}, | ||
{Type: storepb.LabelMatcher_EQ, Name: "region", Value: "eu-west"}, | ||
}, | ||
}, srv) | ||
testutil.Ok(t, err) | ||
|
||
testutil.Equals(t, 1, len(srv.SeriesSet)) | ||
|
||
firstSeries := srv.SeriesSet[0] | ||
|
||
testutil.Equals(t, []storepb.Label{ | ||
{Name: "a", Value: "b"}, | ||
{Name: "region", Value: "eu-west"}, | ||
}, firstSeries.Labels) | ||
|
||
testutil.Equals(t, 2, len(firstSeries.Chunks)) | ||
|
||
chunk, err := chunkenc.FromData(chunkenc.EncXOR, firstSeries.Chunks[0].Raw.Data) | ||
testutil.Ok(t, err) | ||
testutil.Equals(t, math.MaxUint16, chunk.NumSamples()) | ||
|
||
chunk, err = chunkenc.FromData(chunkenc.EncXOR, firstSeries.Chunks[1].Raw.Data) | ||
testutil.Ok(t, err) | ||
testutil.Equals(t, 5, chunk.NumSamples()) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This name could be misleading as proper chunking should be with max 120 samples (:
I think we should name it bit differently for reader to explicitly state it is fine to do biggestPossible chunks in our query case. Alternatively name it like this but add
maxSamples int64
argument.Also we could consider, as we have lot's of raw samples here to decrease mem consumption for sidecar, to reuse
e.Samples
instead of recreating multiple of arrays ofprombp.Sample
. Each prompb.Samples array with lenght 2^16 is 1MB worth of mem. Maybe not much to optimize this not sure. Depends on how many samples at the end you have. Plus with lazy GC and multiple series it might be quite large number.Up to you, we can start with something readable like this and move for microoptimizations later on or move to slicing and operate on indexes only e.g (I wrote it in bus, so I did not test this)
What do you think?