Skip to content

Commit

Permalink
Query Sample Statistics (#4708)
Browse files Browse the repository at this point in the history
* Implementing stats on results_cache

* Fix TestResultsCacheRecent

* adding some comments

* Update Changelog

* Add test case with 3 responses

* address feedback

* Doc update
  • Loading branch information
alanprot authored Apr 7, 2022
1 parent bb6b026 commit 34a541f
Show file tree
Hide file tree
Showing 13 changed files with 1,748 additions and 128 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* [CHANGE] Distributor: Apply `max_fetched_series_per_query` limit for `/series` API. #4683
* [FEATURE] Ruler: Add `external_labels` option to tag all alerts with a given set of labels.
* [FEATURE] Compactor: Add `-compactor.skip-blocks-with-out-of-order-chunks-enabled` configuration to mark blocks containing index with out-of-order chunks for no compact instead of halting the compaction
* [FEATURE] Querier/Query-Frontend: Add `-querier.per-step-stats-enabled` and `-frontend.cache-queryable-samples-stats` configurations to enable query sample statistics

## 1.12.0 in progress

Expand Down
4 changes: 4 additions & 0 deletions docs/blocks-storage/querier.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ querier:
# CLI flag: -querier.at-modifier-enabled
[at_modifier_enabled: <boolean> | default = false]

# Enable returning samples stats per steps in query response.
# CLI flag: -querier.per-step-stats-enabled
[per_step_stats_enabled: <boolean> | default = false]

# The time after which a metric should be queried from storage and not just
# ingesters. 0 means all queries are sent to store. When running the blocks
# storage, if this option is enabled, the time range of the query sent to the
Expand Down
8 changes: 8 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,10 @@ The `querier_config` configures the Cortex querier.
# CLI flag: -querier.at-modifier-enabled
[at_modifier_enabled: <boolean> | default = false]
# Enable returning samples stats per steps in query response.
# CLI flag: -querier.per-step-stats-enabled
[per_step_stats_enabled: <boolean> | default = false]
# The time after which a metric should be queried from storage and not just
# ingesters. 0 means all queries are sent to store. When running the blocks
# storage, if this option is enabled, the time range of the query sent to the
Expand Down Expand Up @@ -1153,6 +1157,10 @@ results_cache:
# CLI flag: -frontend.compression
[compression: <string> | default = ""]
# Cache Statistics queryable samples on results cache.
# CLI flag: -frontend.cache-queryable-samples-stats
[cache_queryable_samples_stats: <boolean> | default = false]
# Cache query results.
# CLI flag: -querier.cache-results
[cache_results: <boolean> | default = false]
Expand Down
2 changes: 1 addition & 1 deletion pkg/cortex/cortex.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ func (c *Config) Validate(log log.Logger) error {
if err := c.Worker.Validate(log); err != nil {
return errors.Wrap(err, "invalid frontend_worker config")
}
if err := c.QueryRange.Validate(); err != nil {
if err := c.QueryRange.Validate(c.Querier); err != nil {
return errors.Wrap(err, "invalid query_range config")
}
if err := c.TableManager.Validate(); err != nil {
Expand Down
11 changes: 6 additions & 5 deletions pkg/cortex/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,11 +523,12 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
queryrange.PrometheusResponseExtractor{},
t.Cfg.Schema,
promql.EngineOpts{
Logger: util_log.Logger,
Reg: prometheus.DefaultRegisterer,
MaxSamples: t.Cfg.Querier.MaxSamples,
Timeout: t.Cfg.Querier.Timeout,
EnableAtModifier: t.Cfg.Querier.AtModifierEnabled,
Logger: util_log.Logger,
Reg: prometheus.DefaultRegisterer,
MaxSamples: t.Cfg.Querier.MaxSamples,
Timeout: t.Cfg.Querier.Timeout,
EnableAtModifier: t.Cfg.Querier.AtModifierEnabled,
EnablePerStepStats: t.Cfg.Querier.EnablePerStepStats,
NoStepSubqueryIntervalFn: func(int64) int64 {
return t.Cfg.Querier.DefaultEvaluationInterval.Milliseconds()
},
Expand Down
3 changes: 3 additions & 0 deletions pkg/querier/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ type Config struct {
QueryIngestersWithin time.Duration `yaml:"query_ingesters_within"`
QueryStoreForLabels bool `yaml:"query_store_for_labels_enabled"`
AtModifierEnabled bool `yaml:"at_modifier_enabled"`
EnablePerStepStats bool `yaml:"per_step_stats_enabled"`

// QueryStoreAfter the time after which queries should also be sent to the store and not just ingesters.
QueryStoreAfter time.Duration `yaml:"query_store_after"`
Expand Down Expand Up @@ -92,6 +93,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&cfg.QueryIngestersWithin, "querier.query-ingesters-within", 0, "Maximum lookback beyond which queries are not sent to ingester. 0 means all queries are sent to ingester.")
f.BoolVar(&cfg.QueryStoreForLabels, "querier.query-store-for-labels-enabled", false, "Query long-term store for series, label values and label names APIs. Works only with blocks engine.")
f.BoolVar(&cfg.AtModifierEnabled, "querier.at-modifier-enabled", false, "Enable the @ modifier in PromQL.")
f.BoolVar(&cfg.EnablePerStepStats, "querier.per-step-stats-enabled", false, "Enable returning samples stats per steps in query response.")
f.DurationVar(&cfg.MaxQueryIntoFuture, "querier.max-query-into-future", 10*time.Minute, "Maximum duration into the future you can query. 0 to disable.")
f.DurationVar(&cfg.DefaultEvaluationInterval, "querier.default-evaluation-interval", time.Minute, "The default evaluation interval or step size for subqueries.")
f.DurationVar(&cfg.QueryStoreAfter, "querier.query-store-after", 0, "The time after which a metric should be queried from storage and not just ingesters. 0 means all queries are sent to store. When running the blocks storage, if this option is enabled, the time range of the query sent to the store will be manipulated to ensure the query end is not more recent than 'now - query-store-after'.")
Expand Down Expand Up @@ -174,6 +176,7 @@ func New(cfg Config, limits *validation.Overrides, distributor Distributor, stor
MaxSamples: cfg.MaxSamples,
Timeout: cfg.Timeout,
LookbackDelta: cfg.LookbackDelta,
EnablePerStepStats: cfg.EnablePerStepStats,
EnableAtModifier: cfg.AtModifierEnabled,
NoStepSubqueryIntervalFn: func(int64) int64 {
return cfg.DefaultEvaluationInterval.Milliseconds()
Expand Down
93 changes: 93 additions & 0 deletions pkg/querier/queryrange/query_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"strconv"
"strings"
"time"
"unsafe"

"github.com/gogo/protobuf/proto"
"github.com/gogo/status"
Expand Down Expand Up @@ -88,6 +89,10 @@ type Request interface {
proto.Message
// LogToSpan writes information about this request to an OpenTracing span
LogToSpan(opentracing.Span)
// GetStats returns the stats of the request.
GetStats() string
// WithStats clones the current `PrometheusRequest` with a new stats.
WithStats(stats string) Request
}

// Response represents a query range response.
Expand All @@ -114,6 +119,13 @@ func (q *PrometheusRequest) WithQuery(query string) Request {
return &new
}

// WithStats clones the current `PrometheusRequest` with a new stats.
func (q *PrometheusRequest) WithStats(stats string) Request {
new := *q
new.Stats = stats
return &new
}

// LogToSpan logs the current `PrometheusRequest` parameters to the specified span.
func (q *PrometheusRequest) LogToSpan(sp opentracing.Span) {
sp.LogFields(
Expand Down Expand Up @@ -174,6 +186,7 @@ func (prometheusCodec) MergeResponse(responses ...Response) (Response, error) {
Data: PrometheusData{
ResultType: model.ValMatrix.String(),
Result: matrixMerge(promResponses),
Stats: statsMerge(promResponses),
},
}

Expand Down Expand Up @@ -220,6 +233,7 @@ func (prometheusCodec) DecodeRequest(_ context.Context, r *http.Request, forward
}

result.Query = r.FormValue("query")
result.Stats = r.FormValue("stats")
result.Path = r.URL.Path

// Include the specified headers from http request in prometheusRequest.
Expand Down Expand Up @@ -252,6 +266,7 @@ func (prometheusCodec) EncodeRequest(ctx context.Context, r Request) (*http.Requ
"end": []string{encodeTime(promReq.End)},
"step": []string{encodeDurationMs(promReq.Step)},
"query": []string{promReq.Query},
"stats": []string{promReq.Stats},
}
u := &url.URL{
Path: promReq.Path,
Expand Down Expand Up @@ -380,6 +395,46 @@ func (s *SampleStream) MarshalJSON() ([]byte, error) {
return json.Marshal(stream)
}

// statsMerge merge the stats from 2 responses
// this function is similar to matrixMerge
func statsMerge(resps []*PrometheusResponse) *PrometheusResponseStats {
output := map[int64]*PrometheusResponseQueryableSamplesStatsPerStep{}
hasStats := false
for _, resp := range resps {
if resp.Data.Stats == nil {
continue
}

hasStats = true
if resp.Data.Stats.Samples == nil {
continue
}

for _, s := range resp.Data.Stats.Samples.TotalQueryableSamplesPerStep {
output[s.GetTimestampMs()] = s
}
}

if !hasStats {
return nil
}

keys := make([]int64, 0, len(output))
for key := range output {
keys = append(keys, key)
}

sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })

result := &PrometheusResponseStats{Samples: &PrometheusResponseSamplesStats{}}
for _, key := range keys {
result.Samples.TotalQueryableSamplesPerStep = append(result.Samples.TotalQueryableSamplesPerStep, output[key])
result.Samples.TotalQueryableSamples += output[key].Value
}

return result
}

func matrixMerge(resps []*PrometheusResponse) []SampleStream {
output := map[string]*SampleStream{}
for _, resp := range resps {
Expand Down Expand Up @@ -473,3 +528,41 @@ func decorateWithParamName(err error, field string) error {
}
return fmt.Errorf(errTmpl, field, err)
}

func PrometheusResponseQueryableSamplesStatsPerStepJsoniterDecode(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
if !iter.ReadArray() {
iter.ReportError("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", "expected [")
return
}

t := model.Time(iter.ReadFloat64() * float64(time.Second/time.Millisecond))

if !iter.ReadArray() {
iter.ReportError("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", "expected ,")
return
}
v := iter.ReadInt64()

if iter.ReadArray() {
iter.ReportError("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", "expected ]")
}

*(*PrometheusResponseQueryableSamplesStatsPerStep)(ptr) = PrometheusResponseQueryableSamplesStatsPerStep{
TimestampMs: int64(t),
Value: v,
}
}

func PrometheusResponseQueryableSamplesStatsPerStepJsoniterEncode(ptr unsafe.Pointer, stream *jsoniter.Stream) {
stats := (*PrometheusResponseQueryableSamplesStatsPerStep)(ptr)
stream.WriteArrayStart()
stream.WriteFloat64(float64(stats.TimestampMs) / float64(time.Second/time.Millisecond))
stream.WriteMore()
stream.WriteInt64(stats.Value)
stream.WriteArrayEnd()
}

func init() {
jsoniter.RegisterTypeEncoderFunc("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", PrometheusResponseQueryableSamplesStatsPerStepJsoniterEncode, func(unsafe.Pointer) bool { return false })
jsoniter.RegisterTypeDecoderFunc("queryrange.PrometheusResponseQueryableSamplesStatsPerStep", PrometheusResponseQueryableSamplesStatsPerStepJsoniterDecode)
}
Loading

0 comments on commit 34a541f

Please sign in to comment.