From 87cf90e647518ce96426e116c01c6050ad5ce40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Wed, 15 Dec 2021 11:13:13 +0200 Subject: [PATCH] sidecar: not ready when Prometheus is unavailable (#4939) Signed-off-by: Philip Gough --- CHANGELOG.md | 1 + cmd/thanos/sidecar.go | 2 ++ test/e2e/query_test.go | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d1e4648eef..b309d85f9bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Changed +- [#4939](https://github.com/thanos-io/thanos/pull/4939) Sidecar: set Sidecar to NOT READY when it cannot establish a connection with Prometheus - [#4864](https://github.com/thanos-io/thanos/pull/4864) UI: Remove the old PromQL editor ## [v0.23.1](https://github.com/thanos-io/thanos/tree/release-0.23) - 2021.10.1 diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index 56e0076f5d7..291782e9a3d 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -223,8 +223,10 @@ func runSidecar( if err := m.UpdateLabels(iterCtx); err != nil { level.Warn(logger).Log("msg", "heartbeat failed", "err", err) promUp.Set(0) + statusProber.NotReady(err) } else { promUp.Set(1) + statusProber.Ready() } return nil diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index cd61e991abf..7f5e4b0f37b 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -6,6 +6,7 @@ package e2e_test import ( "context" "fmt" + "net/http" "net/http/httptest" "net/url" "os" @@ -103,6 +104,40 @@ func sortResults(res model.Vector) { }) } +func TestSidecarNotReady(t *testing.T) { + t.Parallel() + + e, err := e2e.NewDockerEnvironment("e2e_test_query") + testutil.Ok(t, err) + t.Cleanup(e2ethanos.CleanScenario(t, e)) + + prom, sidecar, err := e2ethanos.NewPrometheusWithSidecar(e, "alone", defaultPromConfig("prom-alone", 0, "", ""), "", e2ethanos.DefaultPrometheusImage()) + testutil.Ok(t, err) + testutil.Ok(t, e2e.StartAndWaitReady(prom, sidecar)) + testutil.Ok(t, prom.Stop()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Sidecar should not be ready - it cannot accept traffic if Prometheus is down. + testutil.Ok(t, runutil.Retry(1*time.Second, ctx.Done(), func() (rerr error) { + req, err := http.NewRequestWithContext(ctx, "GET", "http://"+sidecar.Endpoint("http")+"/-/ready", nil) + if err != nil { + return err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer runutil.CloseWithErrCapture(&rerr, resp.Body, "closing resp body") + + if resp.StatusCode == 200 { + return fmt.Errorf("got status code %d", resp.StatusCode) + } + return nil + })) +} + func TestQuery(t *testing.T) { t.Parallel()