-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sidecar: Added matchers support #3940
Changes from 10 commits
00e9eb2
51a162d
f4096da
f4069b7
6c2ec33
a4073e4
fe5067a
92c3234
df6b6ef
aaf7906
ef797f6
fd708fe
d874bc1
e59208c
893e37c
92570d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -209,14 +209,40 @@ func runSidecar( | |
cancel() | ||
}) | ||
} | ||
{ | ||
ctx, cancel := context.WithCancel(context.Background()) | ||
g.Add(func() error { | ||
// We retry infinitely until we reach and fetch BuildVersion from our Prometheus. | ||
err := runutil.Retry(2*time.Second, ctx.Done(), func() error { | ||
if err := m.BuildVersion(ctx); err != nil { | ||
level.Warn(logger).Log( | ||
"msg", "failed to fetch prometheus version. Is Prometheus running? Retrying", | ||
"err", err, | ||
) | ||
return err | ||
} | ||
|
||
level.Info(logger).Log( | ||
"msg", "successfully loaded prometheus version", | ||
) | ||
return nil | ||
}) | ||
if err != nil { | ||
return errors.Wrap(err, "failed to get prometheus version") | ||
} | ||
return nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you run this in a goroutine and it ends, the sidecar program also stops running because of the run group. Please make sure your code doesn't break the functionality. You can see the tests failed. I think it is unnecessary to have a separate goroutine for checking the Prometheus version as it is unlikely to change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've got a doubt that we already have more than one actor in this run group but why there is no such problem with them?
Do we add things in a separate goroutine which are likely to change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
}, func(error) { | ||
cancel() | ||
}) | ||
} | ||
|
||
{ | ||
t := exthttp.NewTransport() | ||
t.MaxIdleConnsPerHost = conf.connection.maxIdleConnsPerHost | ||
t.MaxIdleConns = conf.connection.maxIdleConns | ||
c := promclient.NewClient(&http.Client{Transport: tracing.HTTPTripperware(logger, t)}, logger, thanoshttp.ThanosUserAgent) | ||
|
||
promStore, err := store.NewPrometheusStore(logger, reg, c, conf.prometheus.url, component.Sidecar, m.Labels, m.Timestamps) | ||
promStore, err := store.NewPrometheusStore(logger, reg, c, conf.prometheus.url, component.Sidecar, m.Labels, m.Timestamps, m.Version) | ||
if err != nil { | ||
return errors.Wrap(err, "create Prometheus store") | ||
} | ||
|
@@ -346,10 +372,11 @@ func validatePrometheus(ctx context.Context, client *promclient.Client, logger l | |
type promMetadata struct { | ||
promURL *url.URL | ||
|
||
mtx sync.Mutex | ||
mint int64 | ||
maxt int64 | ||
labels labels.Labels | ||
mtx sync.Mutex | ||
mint int64 | ||
maxt int64 | ||
labels labels.Labels | ||
promVersion string | ||
|
||
limitMinTime thanosmodel.TimeOrDurationValue | ||
|
||
|
@@ -395,6 +422,26 @@ func (s *promMetadata) Timestamps() (mint int64, maxt int64) { | |
return s.mint, s.maxt | ||
} | ||
|
||
func (s *promMetadata) BuildVersion(ctx context.Context) error { | ||
ver, err := s.client.BuildVersion(ctx, s.promURL) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
s.mtx.Lock() | ||
defer s.mtx.Unlock() | ||
|
||
s.promVersion = ver | ||
return nil | ||
} | ||
|
||
func (s *promMetadata) Version() string { | ||
s.mtx.Lock() | ||
defer s.mtx.Unlock() | ||
|
||
return s.promVersion | ||
} | ||
|
||
type sidecarConfig struct { | ||
http httpConfig | ||
grpc grpcConfig | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,7 @@ type PrometheusStore struct { | |
component component.StoreAPI | ||
externalLabelsFn func() labels.Labels | ||
timestamps func() (mint int64, maxt int64) | ||
promVersion func() string | ||
|
||
remoteReadAcceptableResponses []prompb.ReadRequest_ResponseType | ||
|
||
|
@@ -69,6 +70,7 @@ func NewPrometheusStore( | |
component component.StoreAPI, | ||
externalLabelsFn func() labels.Labels, | ||
timestamps func() (mint int64, maxt int64), | ||
promVersion func() string, | ||
) (*PrometheusStore, error) { | ||
if logger == nil { | ||
logger = log.NewNopLogger() | ||
|
@@ -80,6 +82,7 @@ func NewPrometheusStore( | |
component: component, | ||
externalLabelsFn: externalLabelsFn, | ||
timestamps: timestamps, | ||
promVersion: promVersion, | ||
remoteReadAcceptableResponses: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS, prompb.ReadRequest_SAMPLES}, | ||
buffers: sync.Pool{New: func() interface{} { | ||
b := make([]byte, 0, initialBufSize) | ||
|
@@ -496,9 +499,39 @@ func (p *PrometheusStore) LabelValues(ctx context.Context, r *storepb.LabelValue | |
return &storepb.LabelValuesResponse{Values: []string{l}}, nil | ||
} | ||
|
||
vals, err := p.client.LabelValuesInGRPC(ctx, p.base, r.Label, nil, r.Start, r.End) | ||
if err != nil { | ||
return nil, err | ||
var ( | ||
sers []map[string]string | ||
err error | ||
) | ||
|
||
vals := []string{} | ||
version := p.promVersion() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
if len(r.Matchers) == 0 || version > "2.24" { | ||
vals, err = p.client.LabelValuesInGRPC(ctx, p.base, r.Label, r.Matchers, r.Start, r.End) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we making gRPC call? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. YES There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The naming makes me confused, too. We are making HTTP calls but return GRPC error. |
||
if err != nil { | ||
return nil, err | ||
} | ||
} else { | ||
matchers, err := storepb.MatchersToPromMatchers(r.Matchers...) | ||
if err != nil { | ||
return nil, status.Error(codes.Internal, err.Error()) | ||
} | ||
sers, err = p.client.SeriesInGRPC(ctx, p.base, matchers, r.Start, r.End) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// using set to handle duplicate values. | ||
Namanl2001 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
labelValuesSet := make(map[string]struct{}) | ||
for _, s := range sers { | ||
if val, exists := s[r.Label]; exists { | ||
Namanl2001 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
labelValuesSet[val] = struct{}{} | ||
} | ||
} | ||
for key := range labelValuesSet { | ||
vals = append(vals, key) | ||
} | ||
} | ||
sort.Strings(vals) | ||
Namanl2001 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return &storepb.LabelValuesResponse{Values: vals}, nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am still thinking about this logic.
This doesn't work well when Prometheus version is < 2.14.0, where
/api/v1/status/buildinfo
doesn't exist. In this case, we just get 404s but we are still retrying indefinitely. This seems wrong to me.We can only retry for a short time or we check the return error to stop retrying.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lets retry for lets say
N
times (maybeN=5
) and exit if retry fails?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also for Prometheus
<2.14.0
this function would fail, so we also need to decide how to fetch build version for<2.14.0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As discussed in contributor hours:
Let’s check for 404, if it’s present, set 0 value for promVersion