-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
store: add ability to limit max num of samples / concurrent queries #798
Changes from 22 commits
1bc1f59
e87f763
1ab1dc6
d7c3ade
12db24a
9d0b8a7
9727072
d9c733a
c4ce735
30eef19
194394d
2e51c2e
58a14fa
4d1b7ed
3ae3733
b149f74
e79c56d
1d07515
38a093b
7b13f7e
4540394
3e532fe
cff979c
e7ea64b
23c7368
da575e4
e390846
24e8e1f
4012eca
e7be55d
3e8150d
5ec5ce9
810a131
541f180
ae8e425
de8a234
07b4658
61d6ecd
70b115d
36f1153
9b74bbe
82bdb3c
4d8420f
590b9a6
3f40bac
f4734e5
d6c1534
1147acd
1d0fad3
ef4a51e
48141fd
c9a7d83
d71f1d8
280a8ca
11c4b18
31a8346
6e98dfd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,12 @@ func registerStore(m map[string]setupFunc, app *kingpin.Application, name string | |
chunkPoolSize := cmd.Flag("chunk-pool-size", "Maximum size of concurrently allocatable bytes for chunks."). | ||
Default("2GB").Bytes() | ||
|
||
maxSampleCount := cmd.Flag("grpc-sample-limit", | ||
"Maximum amount of samples returned via a single Series call. 0 means no limit. NOTE: may overestimate the number of samples that would be needed to respond to a query."). | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Default("50000000").Uint() | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
maxConcurrent := cmd.Flag("grpc-concurrent-limit", "Maximum number of concurrent Series calls. 0 means no limit.").Default("20").Int() | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above See There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will rename to |
||
|
||
objStoreConfig := regCommonObjStoreFlags(cmd, "", true) | ||
|
||
syncInterval := cmd.Flag("sync-block-duration", "Repeat interval for syncing the blocks between local and remote view."). | ||
|
@@ -63,6 +69,8 @@ func registerStore(m map[string]setupFunc, app *kingpin.Application, name string | |
peer, | ||
uint64(*indexCacheSize), | ||
uint64(*chunkPoolSize), | ||
uint64(*maxSampleCount), | ||
int(*maxConcurrent), | ||
name, | ||
debugLogging, | ||
*syncInterval, | ||
|
@@ -87,6 +95,8 @@ func runStore( | |
peer cluster.Peer, | ||
indexCacheSizeBytes uint64, | ||
chunkPoolSizeBytes uint64, | ||
maxSampleCount uint64, | ||
maxConcurrent int, | ||
component string, | ||
verbose bool, | ||
syncInterval time.Duration, | ||
|
@@ -117,6 +127,8 @@ func runStore( | |
dataDir, | ||
indexCacheSizeBytes, | ||
chunkPoolSizeBytes, | ||
maxSampleCount, | ||
maxConcurrent, | ||
verbose, | ||
blockSyncConcurrency, | ||
) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,14 @@ import ( | |
"google.golang.org/grpc/status" | ||
) | ||
|
||
// Approximately this is the max number of samples that we may have in any given chunk. This is needed | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// for precalculating the number of samples that we may have to retrieve and decode for any given query | ||
// without downloading them. Please take a look at https://github.com/prometheus/tsdb/pull/397 to know | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Someone could tell that's too verbose, but I actually like this 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 someone will definitely start wondering why 120 is here just like I have at the beginning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Think this is fine 👍 |
||
// where this number comes from. Long story short: TSDB is made in such a way, and it is made in such a way | ||
// because you barely get any improvements in compression when the number of samples is beyond this. | ||
// Take a look at Figure 6 in this whitepaper http://www.vldb.org/pvldb/vol8/p1816-teller.pdf. | ||
const maxSamplesPerChunk uint64 = 120 | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
type bucketStoreMetrics struct { | ||
blocksLoaded prometheus.Gauge | ||
blockLoads prometheus.Counter | ||
|
@@ -57,6 +65,8 @@ type bucketStoreMetrics struct { | |
seriesMergeDuration prometheus.Histogram | ||
resultSeriesCount prometheus.Summary | ||
chunkSizeBytes prometheus.Histogram | ||
queriesLimited prometheus.Counter | ||
queriesLimit prometheus.Gauge | ||
} | ||
|
||
func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { | ||
|
@@ -132,6 +142,15 @@ func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { | |
}, | ||
}) | ||
|
||
m.queriesLimited = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "thanos_bucket_store_queries_limited", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this metric name is off. I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, will rename :) |
||
Help: "Number of queries that were dropped due to the sample limit.", | ||
}) | ||
m.queriesLimit = prometheus.NewGauge(prometheus.GaugeOpts{ | ||
Name: "thanos_bucket_store_queries_limit", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prometheus' team use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. Should rename this to |
||
Help: "Number of maximum concurrent queries.", | ||
}) | ||
|
||
if reg != nil { | ||
reg.MustRegister( | ||
m.blockLoads, | ||
|
@@ -148,6 +167,8 @@ func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { | |
m.seriesMergeDuration, | ||
m.resultSeriesCount, | ||
m.chunkSizeBytes, | ||
m.queriesLimited, | ||
m.queriesLimit, | ||
) | ||
} | ||
return &m | ||
|
@@ -173,7 +194,12 @@ type BucketStore struct { | |
// Number of goroutines to use when syncing blocks from object storage. | ||
blockSyncConcurrency int | ||
|
||
partitioner partitioner | ||
// Query gate which limits the maximum amount of concurrent queries. | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
queryGate *Gate | ||
|
||
// Samples limiter which limits the number of samples per each Series() call. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
samplesLimiter *Limiter | ||
partitioner partitioner | ||
} | ||
|
||
// NewBucketStore creates a new bucket backed store that implements the store API against | ||
|
@@ -185,12 +211,19 @@ func NewBucketStore( | |
dir string, | ||
indexCacheSizeBytes uint64, | ||
maxChunkPoolBytes uint64, | ||
maxSampleCount uint64, | ||
maxConcurrent int, | ||
debugLogging bool, | ||
blockSyncConcurrency int, | ||
) (*BucketStore, error) { | ||
if logger == nil { | ||
logger = log.NewNopLogger() | ||
} | ||
|
||
if maxConcurrent < 0 { | ||
return nil, errors.Errorf("max concurrency value cannot be lower than 0 (got %v)", maxConcurrent) | ||
} | ||
|
||
indexCache, err := newIndexCache(reg, indexCacheSizeBytes) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "create index cache") | ||
|
@@ -202,6 +235,7 @@ func NewBucketStore( | |
|
||
const maxGapSize = 512 * 1024 | ||
|
||
metrics := newBucketStoreMetrics(reg) | ||
s := &BucketStore{ | ||
logger: logger, | ||
bucket: bucket, | ||
|
@@ -212,14 +246,18 @@ func NewBucketStore( | |
blockSets: map[uint64]*bucketBlockSet{}, | ||
debugLogging: debugLogging, | ||
blockSyncConcurrency: blockSyncConcurrency, | ||
queryGate: NewGate(maxConcurrent, reg), | ||
samplesLimiter: NewLimiter(maxSampleCount, &metrics.queriesLimited), | ||
partitioner: gapBasedPartitioner{maxGapSize: maxGapSize}, | ||
} | ||
s.metrics = newBucketStoreMetrics(reg) | ||
s.metrics = metrics | ||
|
||
if err := os.MkdirAll(dir, 0777); err != nil { | ||
return nil, errors.Wrap(err, "create dir") | ||
} | ||
|
||
s.metrics.queriesLimit.Set(float64(maxConcurrent)) | ||
|
||
return s, nil | ||
} | ||
|
||
|
@@ -472,14 +510,15 @@ func (s *bucketSeriesSet) Err() error { | |
return s.err | ||
} | ||
|
||
func (s *BucketStore) blockSeries( | ||
func blockSeries( | ||
ctx context.Context, | ||
ulid ulid.ULID, | ||
extLset map[string]string, | ||
indexr *bucketIndexReader, | ||
chunkr *bucketChunkReader, | ||
matchers []labels.Matcher, | ||
req *storepb.SeriesRequest, | ||
samplesLimiter *Limiter, | ||
) (storepb.SeriesSet, *queryStats, error) { | ||
ps, err := indexr.ExpandedPostings(matchers) | ||
if err != nil { | ||
|
@@ -557,7 +596,7 @@ func (s *BucketStore) blockSeries( | |
} | ||
|
||
// Preload all chunks that were marked in the previous stage. | ||
if err := chunkr.preload(); err != nil { | ||
if err := chunkr.preload(samplesLimiter); err != nil { | ||
return nil, nil, errors.Wrap(err, "preload chunks") | ||
} | ||
|
||
|
@@ -661,10 +700,17 @@ func debugFoundBlockSetOverview(logger log.Logger, mint, maxt int64, lset labels | |
} | ||
|
||
// Series implements the storepb.StoreServer interface. | ||
// TODO(bwplotka): It buffers all chunks in memory and only then streams to client. | ||
// 1. Either count chunk sizes and error out too big query. | ||
// 2. Stream posting -> series -> chunk all together. | ||
func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storepb.Store_SeriesServer) error { | ||
{ | ||
span, _ := tracing.StartSpan(srv.Context(), "bucket_store_gate") | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
err := s.queryGate.IsMyTurn(srv.Context()) | ||
span.Finish() | ||
if err != nil { | ||
return errors.Wrapf(err, "failed to wait for turn") | ||
} | ||
defer s.queryGate.Done() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you want to actually do this. As defer will end until the whole function is done? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But then the counter would go down at exactly this point and then another query could come through even though the old one is still being parsed. Or am I wrong? Imagine this situation:
We could start parsing another query if it came just after the gate had been passed in another query. Perhaps I am missing something :? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ignore me, I confused this with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. still not resolved:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do, missed that one |
||
} | ||
|
||
matchers, err := translateMatchers(req.Matchers) | ||
if err != nil { | ||
return status.Error(codes.InvalidArgument, err.Error()) | ||
|
@@ -703,13 +749,14 @@ func (s *BucketStore) Series(req *storepb.SeriesRequest, srv storepb.Store_Serie | |
defer runutil.CloseWithLogOnErr(s.logger, chunkr, "series block") | ||
|
||
g.Add(func() error { | ||
part, pstats, err := s.blockSeries(ctx, | ||
part, pstats, err := blockSeries(ctx, | ||
b.meta.ULID, | ||
b.meta.Thanos.Labels, | ||
indexr, | ||
chunkr, | ||
blockMatchers, | ||
req, | ||
s.samplesLimiter, | ||
) | ||
if err != nil { | ||
return errors.Wrapf(err, "fetch series for block %s", b.meta.ULID) | ||
|
@@ -1585,11 +1632,22 @@ func (r *bucketChunkReader) addPreload(id uint64) error { | |
} | ||
|
||
// preload all added chunk IDs. Must be called before the first call to Chunk is made. | ||
func (r *bucketChunkReader) preload() error { | ||
func (r *bucketChunkReader) preload(samplesLimiter *Limiter) error { | ||
const maxChunkSize = 16000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we could perhaps move this to the top along with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No necessarily as it only is used here |
||
|
||
var g run.Group | ||
|
||
numChunks := uint64(0) | ||
for _, offsets := range r.preloads { | ||
for range offsets { | ||
numChunks++ | ||
} | ||
} | ||
numSamples := numChunks * maxSamplesPerChunk | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err := samplesLimiter.Check(numSamples); err != nil { | ||
return errors.Wrapf(err, "exceeded samples limit") | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
for seq, offsets := range r.preloads { | ||
sort.Slice(offsets, func(i, j int) bool { | ||
return offsets[i] < offsets[j] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
package store | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/prometheus/pkg/gate" | ||
) | ||
|
||
// Gate wraps the Prometheus gate with extra metrics. | ||
type Gate struct { | ||
g *gate.Gate | ||
currentQueries prometheus.Gauge | ||
gateTiming prometheus.Summary | ||
} | ||
|
||
// NewGate returns a new gate. | ||
func NewGate(maxConcurrent int, reg prometheus.Registerer) *Gate { | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
g := &Gate{ | ||
g: gate.New(maxConcurrent), | ||
} | ||
g.currentQueries = prometheus.NewGauge(prometheus.GaugeOpts{ | ||
Name: "thanos_bucket_store_queries_total", | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Help: "Total number of currently executing queries.", | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}) | ||
g.gateTiming = prometheus.NewSummary(prometheus.SummaryOpts{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Summaries should be banned. They are quite expensive and non aggregatable. Can we use histogram? (: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. |
||
Name: "thanos_bucket_store_gate_seconds", | ||
Help: "How many seconds it took for a query to pass through the gate.", | ||
}) | ||
|
||
if reg != nil { | ||
reg.MustRegister(g.currentQueries, g.gateTiming) | ||
} | ||
|
||
return g | ||
} | ||
|
||
// IsMyTurn iniates a new query and waits until it's our turn to fulfill a query request. | ||
func (g *Gate) IsMyTurn(ctx context.Context) error { | ||
g.currentQueries.Inc() | ||
start := time.Now() | ||
if err := g.g.Start(ctx); err != nil { | ||
return err | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
g.gateTiming.Observe(float64(time.Now().Sub(start))) | ||
return nil | ||
} | ||
|
||
// Done finishes a query. | ||
func (g *Gate) Done() { | ||
g.currentQueries.Dec() | ||
g.g.Done() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package store | ||
|
||
import ( | ||
"github.com/pkg/errors" | ||
"github.com/prometheus/client_golang/prometheus" | ||
) | ||
|
||
// Limiter is a simple mechanism for checking if something has passed a certain threshold. | ||
type Limiter struct { | ||
limit uint64 | ||
|
||
// A ptr to a counter metric which we will increase if Check() fails. | ||
failedCounter *prometheus.Counter | ||
} | ||
|
||
// NewLimiter returns a new limiter with a specified limit. 0 disables the limit. | ||
func NewLimiter(limit uint64, ctr *prometheus.Counter) *Limiter { | ||
GiedriusS marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return &Limiter{limit: limit, failedCounter: ctr} | ||
} | ||
|
||
// Check checks if the passed number exceeds the limits or not. | ||
func (l *Limiter) Check(num uint64) error { | ||
if l.limit == 0 { | ||
return nil | ||
} | ||
if num > l.limit { | ||
if l.failedCounter != nil { | ||
(*l.failedCounter).Inc() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why casting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will remove. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correction: failedCounter is a pointer so this needs to be derefenced before Inc() is called. We need a pointer here because in this way we decouple the counter from the metrics registry, and it makes it much easier to use in the future if the need will arise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. still not get this, will ask offline There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got it know - we hold pointer to interface. It's interesting as this interface holds always *counter, so I think we are good without this extra ref pointer: Also can we get rid of nil checking everytime we do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 yeah, we should just remove the nil checks since the caller ensures that. This happens also in some other places, will clean this up. |
||
} | ||
return errors.Errorf("limit %v violated (got %v)", l.limit, num) | ||
} | ||
return nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe align the flags with how prom has flags?
grpc-sample-limit
>{query|store|storage}.grpc.read-sample-limit
?See
storage.remote.read-sample-limit
https://github.com/prometheus/prometheus/blob/master/cmd/prometheus/main.go#L206