From 32392ed4f45d75ffab8589159b6b8575d881ad13 Mon Sep 17 00:00:00 2001 From: Abdul Qadeer Date: Tue, 2 Nov 2021 15:25:11 -0700 Subject: [PATCH] Add health check API (#41) * Uptick go version * Add test stage * Add health check for metric provider servers * Address review comments Signed-off-by: Abdul Qadeer --- .github/workflows/ci.yml | 3 +++ go.mod | 2 +- pkg/watcher/internal/metricsprovider/k8s.go | 11 ++++++++++ .../internal/metricsprovider/prometheus.go | 16 ++++++++++++++ .../internal/metricsprovider/signalfx.go | 21 +++++++++++++++++++ pkg/watcher/metricsprovider.go | 5 ++++- pkg/watcher/testserver.go | 4 ++++ pkg/watcher/watcher.go | 13 ++++++++++++ pkg/watcher/watcher_test.go | 11 ++++++++++ 9 files changed, 84 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1a1863c..8fffc07 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,3 +20,6 @@ jobs: - name: Build run: go build -o load-watcher main.go + + - name: Test + run: go test ./... diff --git a/go.mod b/go.mod index af882f0..999753c 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/paypal/load-watcher -go 1.15 +go 1.16 require ( github.com/francoispqt/gojay v1.2.13 diff --git a/pkg/watcher/internal/metricsprovider/k8s.go b/pkg/watcher/internal/metricsprovider/k8s.go index 0f74d29..59ab0ee 100644 --- a/pkg/watcher/internal/metricsprovider/k8s.go +++ b/pkg/watcher/internal/metricsprovider/k8s.go @@ -18,6 +18,8 @@ package metricsprovider import ( "context" + "fmt" + "net/http" "os" "github.com/paypal/load-watcher/pkg/watcher" @@ -153,3 +155,12 @@ func (m metricsServerClient) FetchAllHostsMetrics(window *watcher.Window) (map[s return metrics, nil } + +func (m metricsServerClient) Health() (int, error) { + var status int + m.metricsClientSet.RESTClient().Verb("HEAD").Do(context.Background()).StatusCode(&status) + if status != http.StatusOK { + return -1, fmt.Errorf("received response status code: %v", status) + } + return 0, nil +} diff --git a/pkg/watcher/internal/metricsprovider/prometheus.go b/pkg/watcher/internal/metricsprovider/prometheus.go index 1385cf4..91c5c25 100644 --- a/pkg/watcher/internal/metricsprovider/prometheus.go +++ b/pkg/watcher/internal/metricsprovider/prometheus.go @@ -19,6 +19,7 @@ package metricsprovider import ( "context" "fmt" + "net/http" "time" "github.com/paypal/load-watcher/pkg/watcher" @@ -133,6 +134,21 @@ func (s promClient) FetchAllHostsMetrics(window *watcher.Window) (map[string][]w return hostMetrics, anyerr } +func (s promClient) Health() (int, error) { + req, err := http.NewRequest("HEAD", DefaultPromAddress, nil) + if err != nil { + return -1, err + } + resp, _, err := s.client.Do(context.Background(), req) + if err != nil { + return -1, err + } + if resp.StatusCode != http.StatusOK { + return -1, fmt.Errorf("received response status code: %v", resp.StatusCode) + } + return 0, nil +} + func (s promClient) buildPromQuery(host string, metric string, method string, rollup string) string { var promQuery string diff --git a/pkg/watcher/internal/metricsprovider/signalfx.go b/pkg/watcher/internal/metricsprovider/signalfx.go index e2f9109..855543b 100644 --- a/pkg/watcher/internal/metricsprovider/signalfx.go +++ b/pkg/watcher/internal/metricsprovider/signalfx.go @@ -191,6 +191,27 @@ func (s signalFxClient) FetchAllHostsMetrics(window *watcher.Window) (map[string return metrics, nil } +func (s signalFxClient) Health() (int, error) { + return Ping(s.client, s.signalFxAddress) +} + +// Simple ping utility to a given URL +// Returns -1 if unhealthy, 0 if healthy along with error if any +func Ping(client http.Client, url string) (int, error) { + req, err := http.NewRequest("HEAD", url, nil) + if err != nil { + return -1, err + } + resp, err := client.Do(req) + if err != nil { + return -1, err + } + if resp.StatusCode != http.StatusOK { + return -1, fmt.Errorf("received response code: %v", resp.StatusCode) + } + return 0, nil +} + func addMetadata(metric *watcher.Metric, metricType string) { metric.Operator = watcher.Average if metricType == cpuUtilizationMetric { diff --git a/pkg/watcher/metricsprovider.go b/pkg/watcher/metricsprovider.go index 4638f0b..d739072 100644 --- a/pkg/watcher/metricsprovider.go +++ b/pkg/watcher/metricsprovider.go @@ -50,6 +50,9 @@ type MetricsProviderClient interface { FetchHostMetrics(host string, window *Window) ([]Metric, error) // Fetch metrics for all hosts FetchAllHostsMetrics(window *Window) (map[string][]Metric, error) + // Get metric provider server health status + // Returns 0 if healthy, -1 if unhealthy along with error if any + Health() (int, error) } // Generic metrics provider options @@ -57,4 +60,4 @@ type MetricsProviderOpts struct { Name string Address string AuthToken string -} \ No newline at end of file +} diff --git a/pkg/watcher/testserver.go b/pkg/watcher/testserver.go index 570604c..4910f6e 100644 --- a/pkg/watcher/testserver.go +++ b/pkg/watcher/testserver.go @@ -115,3 +115,7 @@ func (t testServerClient) FetchAllHostsMetrics(window *Window) (map[string][]Met return FifteenMinutesMetricsMap, nil } + +func (t testServerClient) Health() (int, error) { + return 0, nil +} diff --git a/pkg/watcher/watcher.go b/pkg/watcher/watcher.go index 2244134..4b7d6d3 100644 --- a/pkg/watcher/watcher.go +++ b/pkg/watcher/watcher.go @@ -37,6 +37,7 @@ import ( const ( BaseUrl = "/watcher" + HealthCheckUrl = "/watcher/health" FifteenMinutes = "15m" TenMinutes = "10m" FiveMinutes = "5m" @@ -153,6 +154,7 @@ func (w *Watcher) StartWatching() { } http.HandleFunc(BaseUrl, w.handler) + http.HandleFunc(HealthCheckUrl, w.healthCheckHandler) server := &http.Server{ Addr: ":2020", Handler: http.DefaultServeMux, @@ -176,6 +178,7 @@ func (w *Watcher) StartWatching() { w.mutex.Lock() w.isStarted = true w.mutex.Unlock() + log.Info("Started watching metrics") } // StartWatching() should be called before calling this. @@ -297,6 +300,16 @@ func (w *Watcher) handler(resp http.ResponseWriter, r *http.Request) { } } +// Simple server status handler +func (w *Watcher) healthCheckHandler(resp http.ResponseWriter, r *http.Request) { + if status, err := w.client.Health(); status != 0 { + log.Warnf("health check failed with: %v", err) + resp.WriteHeader(http.StatusServiceUnavailable) + return + } + resp.WriteHeader(http.StatusOK) +} + // Utility functions func metricMapToWatcherMetrics(metricMap map[string][]Metric, clientName string, window Window) WatcherMetrics { diff --git a/pkg/watcher/watcher_test.go b/pkg/watcher/watcher_test.go index 47373a5..1fdd6a2 100644 --- a/pkg/watcher/watcher_test.go +++ b/pkg/watcher/watcher_test.go @@ -120,6 +120,17 @@ func TestWatcherInternalServerError(t *testing.T) { assert.Equal(t, http.StatusInternalServerError, rr.Code) } +func TestWatcherHealthCheck(t *testing.T) { + req, err := http.NewRequest("GET", HealthCheckUrl, nil) + require.Nil(t, err) + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(w.handler) + + handler.ServeHTTP(rr, req) + require.Equal(t, http.StatusOK, rr.Code) +} + func TestMain(m *testing.M) { client := NewTestMetricsServerClient() w = NewWatcher(client)