From 367e87813c2651b7fdbc32f1722ed747f31f7101 Mon Sep 17 00:00:00 2001 From: Michael Okoko Date: Wed, 14 Apr 2021 11:52:23 +0100 Subject: [PATCH 1/2] Re-enable e2e test for ruler E2E tests for ruler are currently marked as skip. This re-enables the test until it breaks again and we are able to catch the flakiness. Also, it removes the test for AlertManager HTTP client pending when we can improve on implementation (see https://github.com/thanos-io/thanos/issues/4056). Signed-off-by: Michael Okoko --- test/e2e/rule_test.go | 178 ------------------------------------------ 1 file changed, 178 deletions(-) diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index 5739b7b9f7..6fa4ae5980 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -7,19 +7,15 @@ import ( "bytes" "context" "encoding/json" - "encoding/pem" "fmt" "io/ioutil" "net/http" - "net/http/httptest" "os" "path/filepath" - "sync" "testing" "time" "github.com/cortexproject/cortex/integration/e2e" - "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" "gopkg.in/yaml.v2" @@ -29,7 +25,6 @@ import ( "github.com/thanos-io/thanos/pkg/promclient" "github.com/thanos-io/thanos/pkg/query" "github.com/thanos-io/thanos/pkg/rules/rulespb" - "github.com/thanos-io/thanos/pkg/runutil" "github.com/thanos-io/thanos/pkg/testutil" "github.com/thanos-io/thanos/test/e2e/e2ethanos" ) @@ -148,180 +143,7 @@ func writeTargets(t *testing.T, path string, addrs ...string) { testutil.Ok(t, os.Rename(path+".tmp", path)) } -type mockAlertmanager struct { - path string - token string - mtx sync.Mutex - alerts []*model.Alert - lastError error -} - -func newMockAlertmanager(path string, token string) *mockAlertmanager { - return &mockAlertmanager{ - path: path, - token: token, - alerts: make([]*model.Alert, 0), - } -} - -func (m *mockAlertmanager) setLastError(err error) { - m.mtx.Lock() - defer m.mtx.Unlock() - m.lastError = err -} - -func (m *mockAlertmanager) LastError() error { - m.mtx.Lock() - defer m.mtx.Unlock() - return m.lastError -} - -func (m *mockAlertmanager) Alerts() []*model.Alert { - m.mtx.Lock() - defer m.mtx.Unlock() - return m.alerts -} - -func (m *mockAlertmanager) ServeHTTP(resp http.ResponseWriter, req *http.Request) { - if req.Method != "POST" { - m.setLastError(errors.Errorf("invalid method: %s", req.Method)) - resp.WriteHeader(http.StatusMethodNotAllowed) - return - } - - if req.URL.Path != m.path { - m.setLastError(errors.Errorf("invalid path: %s", req.URL.Path)) - resp.WriteHeader(http.StatusNotFound) - return - } - - if m.token != "" { - auth := req.Header.Get("Authorization") - if auth != fmt.Sprintf("Bearer %s", m.token) { - m.setLastError(errors.Errorf("invalid auth: %s", req.URL.Path)) - resp.WriteHeader(http.StatusForbidden) - return - } - } - - b, err := ioutil.ReadAll(req.Body) - if err != nil { - m.setLastError(err) - resp.WriteHeader(http.StatusInternalServerError) - return - } - - var alerts []*model.Alert - if err := json.Unmarshal(b, &alerts); err != nil { - m.setLastError(err) - resp.WriteHeader(http.StatusInternalServerError) - return - } - - m.mtx.Lock() - m.alerts = append(m.alerts, alerts...) - m.mtx.Unlock() -} - -// TestRule_AlertmanagerHTTPClient verifies that Thanos Ruler can send alerts to -// Alertmanager in various setups: -// * Plain HTTP. -// * HTTPS with custom CA. -// * API with a prefix. -// * API protected by bearer token authentication. -// -// Because Alertmanager supports HTTP only and no authentication, the test uses -// a mocked server instead of the "real" Alertmanager service. -// The other end-to-end tests exercise against the "real" Alertmanager -// implementation. -func TestRule_AlertmanagerHTTPClient(t *testing.T) { - t.Skip("TODO: Allow HTTP ports from binaries running on host to be accessible.") - - s, err := e2e.NewScenario("e2e_test_rule_am_http_client") - testutil.Ok(t, err) - t.Cleanup(e2ethanos.CleanScenario(t, s)) - - tlsSubDir := filepath.Join("tls") - testutil.Ok(t, os.MkdirAll(filepath.Join(s.SharedDir(), tlsSubDir), os.ModePerm)) - - // API v1 with plain HTTP and a prefix. - handler1 := newMockAlertmanager("/prefix/api/v1/alerts", "") - srv1 := httptest.NewServer(handler1) - t.Cleanup(srv1.Close) - - // API v2 with HTTPS and authentication. - handler2 := newMockAlertmanager("/api/v2/alerts", "secret") - srv2 := httptest.NewTLSServer(handler2) - t.Cleanup(srv2.Close) - - var out bytes.Buffer - testutil.Ok(t, pem.Encode(&out, &pem.Block{Type: "CERTIFICATE", Bytes: srv2.TLS.Certificates[0].Certificate[0]})) - caFile := filepath.Join(s.SharedDir(), tlsSubDir, "ca.crt") - testutil.Ok(t, ioutil.WriteFile(caFile, out.Bytes(), 0640)) - - rulesSubDir := filepath.Join("rules") - testutil.Ok(t, os.MkdirAll(filepath.Join(s.SharedDir(), rulesSubDir), os.ModePerm)) - createRuleFiles(t, filepath.Join(s.SharedDir(), rulesSubDir)) - - r, err := e2ethanos.NewRuler(s.SharedDir(), "1", rulesSubDir, []alert.AlertmanagerConfig{ - { - EndpointsConfig: http_util.EndpointsConfig{ - StaticAddresses: []string{srv1.Listener.Addr().String()}, - Scheme: "http", - PathPrefix: "/prefix/", - }, - Timeout: model.Duration(time.Second), - APIVersion: alert.APIv1, - }, - { - HTTPClientConfig: http_util.ClientConfig{ - TLSConfig: http_util.TLSConfig{ - CAFile: filepath.Join(e2e.ContainerSharedDir, tlsSubDir, "ca.crt"), - }, - BearerToken: "secret", - }, - EndpointsConfig: http_util.EndpointsConfig{ - StaticAddresses: []string{srv2.Listener.Addr().String()}, - Scheme: "https", - }, - Timeout: model.Duration(time.Second), - APIVersion: alert.APIv2, - }, - }, []query.Config{ - { - EndpointsConfig: http_util.EndpointsConfig{ - StaticAddresses: func() []string { - q, err := e2ethanos.NewQuerierBuilder(s.SharedDir(), "1", nil).Build() - testutil.Ok(t, err) - return []string{q.NetworkHTTPEndpointFor(s.NetworkName())} - }(), - Scheme: "http", - }, - }, - }) - testutil.Ok(t, err) - testutil.Ok(t, s.StartAndWaitReady(r)) - - q, err := e2ethanos.NewQuerierBuilder(s.SharedDir(), "1", []string{r.GRPCNetworkEndpoint()}).Build() - testutil.Ok(t, err) - testutil.Ok(t, s.StartAndWaitReady(q)) - - ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute) - t.Cleanup(cancel) - - testutil.Ok(t, runutil.Retry(5*time.Second, ctx.Done(), func() (err error) { - for i, am := range []*mockAlertmanager{handler1, handler2} { - if len(am.Alerts()) == 0 { - return errors.Errorf("no alert received from handler%d, last error: %v", i, am.LastError()) - } - } - - return nil - })) -} - func TestRule(t *testing.T) { - t.Skip("Flaky test. Fix it. See: https://github.com/thanos-io/thanos/issues/3425.") t.Parallel() s, err := e2e.NewScenario("e2e_test_rule") From ec9b04222f5a7b491700f994c9c33ce13d794c03 Mon Sep 17 00:00:00 2001 From: Michael Okoko Date: Mon, 7 Jun 2021 06:39:18 +0100 Subject: [PATCH 2/2] Bump rule group intervals and AM timeout Signed-off-by: Michael Okoko --- test/e2e/rule_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/e2e/rule_test.go b/test/e2e/rule_test.go index 6fa4ae5980..e2ee815132 100644 --- a/test/e2e/rule_test.go +++ b/test/e2e/rule_test.go @@ -33,7 +33,7 @@ const ( testAlertRuleAbortOnPartialResponse = ` groups: - name: example_abort - interval: 100ms + interval: 1s # Abort should be a default: partial_response_strategy: "ABORT" rules: - alert: TestAlert_AbortOnPartialResponse @@ -47,7 +47,7 @@ groups: testAlertRuleWarnOnPartialResponse = ` groups: - name: example_warn - interval: 100ms + interval: 1s partial_response_strategy: "WARN" rules: - alert: TestAlert_WarnOnPartialResponse @@ -61,7 +61,7 @@ groups: testAlertRuleAddedLaterWebHandler = ` groups: - name: example - interval: 100ms + interval: 1s partial_response_strategy: "WARN" rules: - alert: TestAlert_HasBeenLoadedViaWebHandler @@ -184,7 +184,7 @@ func TestRule(t *testing.T) { }, Scheme: "http", }, - Timeout: model.Duration(time.Second), + Timeout: model.Duration(10 * time.Second), APIVersion: alert.APIv1, }, }, []query.Config{