Skip to content

Commit

Permalink
ruler,receive,sidecar: StoreAPI Series encodes chunks to 120 samples …
Browse files Browse the repository at this point in the history
…instead of single, big one.

This is to have unified chunk size, and should reduce the load on querier.
This also will be much more comparable when chunk iterator will be done.

See following benchmark results for Receive (multiTSDB):

```
 benchstat -delta-test none _dev/bench_outs/0-receiveseries/benchBenchmarkMultiTSDBSeries.out _dev/bench_outs/1-receiveseries/benchBenchmarkMultiTSDBSeries.out
name                                                                                                  old time/op    new time/op    delta
MultiTSDBSeries/1000000SeriesWith1Samples/headOnly/4_TSDBs_with_1_samples,_250000_series_each-12         6.41s ± 0%     6.16s ± 0%   -3.85%
MultiTSDBSeries/1000000SeriesWith1Samples/blocksOnly/4_TSDBs_with_1_samples,_250000_series_each-12       5.77s ± 0%     6.16s ± 0%   +6.61%
MultiTSDBSeries/100000SeriesWith100Samples/headOnly/4_TSDBs_with_25_samples,_25000_series_each-12        3.68s ± 0%     3.96s ± 0%   +7.43%
MultiTSDBSeries/100000SeriesWith100Samples/blocksOnly/4_TSDBs_with_25_samples,_25000_series_each-12      4.04s ± 0%     4.02s ± 0%   -0.46%
MultiTSDBSeries/1SeriesWith10000000Samples/headOnly/4_TSDBs_with_2500000_samples,_1_series_each-12       1.53s ± 0%     1.57s ± 0%   +2.08%
MultiTSDBSeries/1SeriesWith10000000Samples/blocksOnly/4_TSDBs_with_2500000_samples,_1_series_each-12     1.67s ± 0%     1.66s ± 0%   -1.12%

name                                                                                                  old alloc/op   new alloc/op   delta
MultiTSDBSeries/1000000SeriesWith1Samples/headOnly/4_TSDBs_with_1_samples,_250000_series_each-12        4.08GB ± 0%    4.08GB ± 0%   +0.03%
MultiTSDBSeries/1000000SeriesWith1Samples/blocksOnly/4_TSDBs_with_1_samples,_250000_series_each-12      4.08GB ± 0%    4.08GB ± 0%   -0.01%
MultiTSDBSeries/100000SeriesWith100Samples/headOnly/4_TSDBs_with_25_samples,_25000_series_each-12       1.73GB ± 0%    1.72GB ± 0%   -0.37%
MultiTSDBSeries/100000SeriesWith100Samples/blocksOnly/4_TSDBs_with_25_samples,_25000_series_each-12     1.66GB ± 0%    1.67GB ± 0%   +0.57%
MultiTSDBSeries/1SeriesWith10000000Samples/headOnly/4_TSDBs_with_2500000_samples,_1_series_each-12      2.71GB ± 0%    2.47GB ± 0%   -8.68%
MultiTSDBSeries/1SeriesWith10000000Samples/blocksOnly/4_TSDBs_with_2500000_samples,_1_series_each-12    2.68GB ± 0%    2.46GB ± 0%   -8.14%

name                                                                                                  old allocs/op  new allocs/op  delta
MultiTSDBSeries/1000000SeriesWith1Samples/headOnly/4_TSDBs_with_1_samples,_250000_series_each-12         44.9M ± 0%     44.9M ± 0%   +0.00%
MultiTSDBSeries/1000000SeriesWith1Samples/blocksOnly/4_TSDBs_with_1_samples,_250000_series_each-12       44.9M ± 0%     44.9M ± 0%   -0.00%
MultiTSDBSeries/100000SeriesWith100Samples/headOnly/4_TSDBs_with_25_samples,_25000_series_each-12        27.1M ± 0%     27.1M ± 0%   -0.01%
MultiTSDBSeries/100000SeriesWith100Samples/blocksOnly/4_TSDBs_with_25_samples,_25000_series_each-12      27.1M ± 0%     27.1M ± 0%   +0.01%
MultiTSDBSeries/1SeriesWith10000000Samples/headOnly/4_TSDBs_with_2500000_samples,_1_series_each-12       1.02M ± 0%     1.69M ± 0%  +65.15%
MultiTSDBSeries/1SeriesWith10000000Samples/blocksOnly/4_TSDBs_with_2500000_samples,_1_series_each-12     1.02M ± 0%     1.69M ± 0%  +65.34%
```

Signed-off-by: Bartlomiej Plotka <[email protected]>
  • Loading branch information
bwplotka committed Jul 8, 2020
1 parent e6dcf08 commit 9d0bd29
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 27 deletions.
2 changes: 1 addition & 1 deletion pkg/store/multitsdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func benchMultiTSDBSeries(t testutil.TB, totalSamples, totalSeries int, flushToB

tsdbs := map[string]*TSDBStore{}
for i, db := range dbs {
tsdbs[fmt.Sprintf("%v", i)] = &TSDBStore{db: db, logger: logger, maxSamplesPerChunk: 120} // On production we have math.MaxInt64
tsdbs[fmt.Sprintf("%v", i)] = &TSDBStore{db: db, logger: logger}
}

store := NewMultiTSDBStore(logger, nil, component.Receive, func() map[string]*TSDBStore { return tsdbs })
Expand Down
6 changes: 1 addition & 5 deletions pkg/store/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"fmt"
"io"
"io/ioutil"
"math"
"net/http"
"net/url"
"path"
Expand Down Expand Up @@ -239,10 +238,7 @@ func (p *PrometheusStore) handleSampledPrometheusResponse(s storepb.Store_Series
continue
}

// XOR encoding supports a max size of 2^16 - 1 samples, so we need
// to chunk all samples into groups of no more than 2^16 - 1
// See: https://github.com/thanos-io/thanos/pull/718.
aggregatedChunks, err := p.chunkSamples(e, math.MaxUint16)
aggregatedChunks, err := p.chunkSamples(e, maxSamplesPerChunk)
if err != nil {
return err
}
Expand Down
16 changes: 8 additions & 8 deletions pkg/store/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ func TestPrometheusStore_Info(t *testing.T) {
testutil.Equals(t, int64(456), resp.MaxTime)
}

func testSeries_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T, appender storage.Appender, newStore func() storepb.StoreServer) {
func testSeries_SplitSamplesIntoChunksWithMaxSizeOf120(t *testing.T, appender storage.Appender, newStore func() storepb.StoreServer) {
baseT := timestamp.FromTime(time.Now().AddDate(0, 0, -2)) / 1000 * 1000

offset := int64(2*math.MaxUint16 + 5)
Expand Down Expand Up @@ -580,30 +580,30 @@ func testSeries_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T, appe
{Name: "region", Value: "eu-west"},
}, firstSeries.Labels)

testutil.Equals(t, 3, len(firstSeries.Chunks))
testutil.Equals(t, 1093, len(firstSeries.Chunks))

chunk, err := chunkenc.FromData(chunkenc.EncXOR, firstSeries.Chunks[0].Raw.Data)
testutil.Ok(t, err)
testutil.Equals(t, math.MaxUint16, chunk.NumSamples())
testutil.Equals(t, 120, chunk.NumSamples())

chunk, err = chunkenc.FromData(chunkenc.EncXOR, firstSeries.Chunks[1].Raw.Data)
testutil.Ok(t, err)
testutil.Equals(t, math.MaxUint16, chunk.NumSamples())
testutil.Equals(t, 120, chunk.NumSamples())

chunk, err = chunkenc.FromData(chunkenc.EncXOR, firstSeries.Chunks[2].Raw.Data)
chunk, err = chunkenc.FromData(chunkenc.EncXOR, firstSeries.Chunks[len(firstSeries.Chunks)-1].Raw.Data)
testutil.Ok(t, err)
testutil.Equals(t, 5, chunk.NumSamples())
testutil.Equals(t, 35, chunk.NumSamples())
}

// Regression test for https://github.com/thanos-io/thanos/issues/396.
func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T) {
func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOf120(t *testing.T) {
defer leaktest.CheckTimeout(t, 10*time.Second)()

p, err := e2eutil.NewPrometheus()
testutil.Ok(t, err)
defer func() { testutil.Ok(t, p.Stop()) }()

testSeries_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t, p.Appender(), func() storepb.StoreServer {
testSeries_SplitSamplesIntoChunksWithMaxSizeOf120(t, p.Appender(), func() storepb.StoreServer {
testutil.Ok(t, p.Start())

u, err := url.Parse(fmt.Sprintf("http://%s", p.Addr()))
Expand Down
16 changes: 5 additions & 11 deletions pkg/store/tsdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@ type TSDBReader interface {
// It attaches the provided external labels to all results. It only responds with raw data
// and does not support downsampling.
type TSDBStore struct {
logger log.Logger
db TSDBReader
component component.StoreAPI
externalLabels labels.Labels
maxSamplesPerChunk int
logger log.Logger
db TSDBReader
component component.StoreAPI
externalLabels labels.Labels
}

// ReadWriteTSDBStore is a TSDBStore that can also be written to.
Expand All @@ -55,11 +54,6 @@ func NewTSDBStore(logger log.Logger, _ prometheus.Registerer, db TSDBReader, com
db: db,
component: component,
externalLabels: externalLabels,
// NOTE: XOR encoding supports a max size of 2^16 - 1 samples, so we need
// to chunk all samples into groups of no more than 2^16 - 1
// See: https://github.com/thanos-io/thanos/pull/1038.
// TODO(bwplotka): Consider 120 samples?
maxSamplesPerChunk: math.MaxUint16,
}
}

Expand Down Expand Up @@ -133,7 +127,7 @@ func (s *TSDBStore) Series(r *storepb.SeriesRequest, srv storepb.Store_SeriesSer
if !r.SkipChunks {
// TODO(fabxc): An improvement over this trivial approach would be to directly
// use the chunks provided by TSDB in the response.
c, err := s.encodeChunks(series.Iterator(), s.maxSamplesPerChunk)
c, err := s.encodeChunks(series.Iterator(), maxSamplesPerChunk)
if err != nil {
return status.Errorf(codes.Internal, "encode chunk: %s", err)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/store/tsdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,14 +290,14 @@ func TestTSDBStore_LabelValues(t *testing.T) {
}

// Regression test for https://github.com/thanos-io/thanos/issues/1038.
func TestTSDBStore_Series_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t *testing.T) {
func TestTSDBStore_Series_SplitSamplesIntoChunksWithMaxSizeOf120(t *testing.T) {
defer leaktest.CheckTimeout(t, 10*time.Second)()

db, err := e2eutil.NewTSDB()
defer func() { testutil.Ok(t, db.Close()) }()
testutil.Ok(t, err)

testSeries_SplitSamplesIntoChunksWithMaxSizeOfUint16_e2e(t, db.Appender(), func() storepb.StoreServer {
testSeries_SplitSamplesIntoChunksWithMaxSizeOf120(t, db.Appender(), func() storepb.StoreServer {
tsdbStore := NewTSDBStore(nil, nil, db, component.Rule, labels.FromStrings("region", "eu-west"))

return tsdbStore
Expand Down

0 comments on commit 9d0bd29

Please sign in to comment.