diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index ffe208537a..d5bfcccb42 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -35,63 +35,76 @@ import ( var namespaceName = "openshift-monitoring" type Framework struct { - OperatorClient *client.Client - CRDClient crdc.CustomResourceDefinitionInterface - KubeClient kubernetes.Interface - OpenshiftRouteClient routev1.RouteV1Interface + OperatorClient *client.Client + CRDClient crdc.CustomResourceDefinitionInterface + KubeClient kubernetes.Interface + PrometheusK8sClient *PrometheusClient MonitoringClient *monClient.MonitoringV1Client Ns string } -func New(kubeConfigPath string) (*Framework, error) { +// New returns a new cluster monitoring operator end-to-end test framework and +// triggers all the setup logic. +func New(kubeConfigPath string) (*Framework, cleanUpFunc, error) { config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) if err != nil { - return nil, err + return nil, nil, err } kubeClient, err := kubernetes.NewForConfig(config) if err != nil { - return nil, errors.Wrap(err, "creating kubeClient failed") + return nil, nil, errors.Wrap(err, "creating kubeClient failed") } + // So far only necessary for prometheusK8sClient. openshiftRouteClient, err := routev1.NewForConfig(config) if err != nil { - return nil, errors.Wrap(err, "creating openshiftClient failed") + return nil, nil, errors.Wrap(err, "creating openshiftClient failed") } mClient, err := monClient.NewForConfig(config) if err != nil { - return nil, errors.Wrap(err, "creating monitoring client failed") + return nil, nil, errors.Wrap(err, "creating monitoring client failed") } eclient, err := apiextensionsclient.NewForConfig(config) if err != nil { - return nil, errors.Wrap(err, "creating extensions client failed") + return nil, nil, errors.Wrap(err, "creating extensions client failed") } crdClient := eclient.ApiextensionsV1beta1().CustomResourceDefinitions() operatorClient, err := client.New(config, "", namespaceName, "") if err != nil { - return nil, errors.Wrap(err, "creating operator client failed") + return nil, nil, errors.Wrap(err, "creating operator client failed") } f := &Framework{ - OperatorClient: operatorClient, - KubeClient: kubeClient, - OpenshiftRouteClient: openshiftRouteClient, - CRDClient: crdClient, - MonitoringClient: mClient, - Ns: namespaceName, + OperatorClient: operatorClient, + KubeClient: kubeClient, + CRDClient: crdClient, + MonitoringClient: mClient, + Ns: namespaceName, + } + + cleanUp, err := f.setup() + if err != nil { + return nil, nil, errors.Wrap(err, "failed to setup test framework") + } + + // Prometheus client depends on setup above. + f.PrometheusK8sClient, err = NewPrometheusClient(openshiftRouteClient, kubeClient) + if err != nil { + return nil, nil, errors.Wrap(err, "creating prometheusK8sClient failed") } - return f, nil + return f, cleanUp, nil } type cleanUpFunc func() error -// Setup creates everything necessary to use the test framework. -func (f *Framework) Setup() (cleanUpFunc, error) { +// setup creates everything necessary to use the test framework. +func (f *Framework) setup() (cleanUpFunc, error) { cleanUpFuncs := []cleanUpFunc{} cf, err := f.CreateServiceAccount() diff --git a/test/e2e/framework/prometheus_client.go b/test/e2e/framework/prometheus_client.go index 75cb39d36a..8889d15a66 100644 --- a/test/e2e/framework/prometheus_client.go +++ b/test/e2e/framework/prometheus_client.go @@ -16,11 +16,16 @@ package framework import ( "crypto/tls" + "fmt" "io/ioutil" "net/http" + "strconv" "strings" + "testing" + "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" routev1 "github.com/openshift/client-go/route/clientset/versioned/typed/route/v1" @@ -37,7 +42,7 @@ type PrometheusClient struct { token string } -// NewPrometheusClient returns creates and returns a new PrometheusClient. +// NewPrometheusClient creates and returns a new PrometheusClient. func NewPrometheusClient( routeClient routev1.RouteV1Interface, kubeClient kubernetes.Interface, @@ -68,8 +73,9 @@ func NewPrometheusClient( }, nil } -// Query makes a request against the Prometheus /api/v1/query endpoint. -func (c *PrometheusClient) Query(query string) (int, error) { +// Query runs an http get request against the Prometheus query api and returns +// the response body. +func (c *PrometheusClient) Query(query string) ([]byte, error) { tr := &http.Transport{ TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } @@ -78,7 +84,7 @@ func (c *PrometheusClient) Query(query string) (int, error) { req, err := http.NewRequest("GET", "https://"+c.host+"/api/v1/query", nil) if err != nil { - return 0, err + return nil, err } q := req.URL.Query() @@ -89,21 +95,103 @@ func (c *PrometheusClient) Query(query string) (int, error) { resp, err := client.Do(req) if err != nil { - return 0, err + return nil, err } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { - return 0, err + return nil, err } + return body, nil +} + +// GetFirstValueFromPromQuery takes a query api response body and returns the +// value of the first timeseries. If body contains multiple timeseries +// GetFirstValueFromPromQuery errors. +func GetFirstValueFromPromQuery(body []byte) (int, error) { res, err := gabs.ParseJSON(body) if err != nil { return 0, err } - n, err := res.ArrayCountP("data.result") - return n, err + count, err := res.ArrayCountP("data.result") + if err != nil { + return 0, err + } + + if count != 1 { + return 0, fmt.Errorf("expected body to contain single timeseries but got %v", count) + } + + timeseries, err := res.ArrayElementP(0, "data.result") + if err != nil { + return 0, err + } + + value, err := timeseries.ArrayElementP(1, "value") + if err != nil { + return 0, err + } + + v, err := strconv.Atoi(value.Data().(string)) + if err != nil { + return 0, fmt.Errorf("failed to parse query value: %v", err) + } + + return v, nil +} + +// WaitForQueryReturnGreaterEqualOne see WaitForQueryReturn. +func (c *PrometheusClient) WaitForQueryReturnGreaterEqualOne(t *testing.T, timeout time.Duration, query string) { + c.WaitForQueryReturn(t, timeout, query, func(v int) error { + if v >= 1 { + return nil + } + + return fmt.Errorf("expected value to equal or greater than 1 but got %v", v) + }) +} + +// WaitForQueryReturnOne see WaitForQueryReturn. +func (c *PrometheusClient) WaitForQueryReturnOne(t *testing.T, timeout time.Duration, query string) { + c.WaitForQueryReturn(t, timeout, query, func(v int) error { + if v == 1 { + return nil + } + + return fmt.Errorf("expected value to equal 1 but got %v", v) + }) +} + +// WaitForQueryReturn waits for a given PromQL query for a given time interval +// and validates the **first and only** result with the given validate function. +func (c *PrometheusClient) WaitForQueryReturn(t *testing.T, timeout time.Duration, query string, validate func(int) error) { + err := wait.Poll(5*time.Second, timeout, func() (bool, error) { + defer t.Log("---------------------------\n") + body, err := c.Query(query) + if err != nil { + return false, err + } + + v, err := GetFirstValueFromPromQuery(body) + if err != nil { + t.Logf("failed to extract first value from query response for query %q: %v", query, err) + return false, nil + } + + if err := validate(v); err != nil { + t.Logf("unexpected value for query %q: %v", query, err) + return false, nil + } + + t.Logf("query %q succeeded", query) + return true, nil + }) + + if err != nil { + t.Fatal(err) + } } diff --git a/test/e2e/framework/prometheus_client_test.go b/test/e2e/framework/prometheus_client_test.go new file mode 100644 index 0000000000..718add1a60 --- /dev/null +++ b/test/e2e/framework/prometheus_client_test.go @@ -0,0 +1,61 @@ +// Copyright 2019 The Cluster Monitoring Operator Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package framework + +import ( + "testing" +) + +func TestGetFirstValueFromPromQuery(t *testing.T) { + tests := []struct { + Name string + F func(t *testing.T) + }{ + { + Name: "should fail on multiple timeseries", + F: func(t *testing.T) { + body := ` +{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"TargetDown","alertstate":"firing","job":"metrics","severity":"warning"},"value":[1551102571.196,"1"]},{"metric":{"__name__":"ALERTS","alertname":"Watchdog","alertstate":"firing","severity":"none"},"value":[1551102571.196,"1"]}]}} +` + + _, err := GetFirstValueFromPromQuery([]byte(body)) + if err == nil || err.Error() != "expected body to contain single timeseries but got 2" { + t.Fatalf("expected GetFirstValueFromPromQuery to fail on multiple timeseries but got err %q instead", err) + } + }, + }, + { + Name: "should return first value", + F: func(t *testing.T) { + body := ` +{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"Watchdog","alertstate":"firing","severity":"none"},"value":[1551102571.196,"1"]}]}} +` + + v, err := GetFirstValueFromPromQuery([]byte(body)) + if err != nil { + t.Fatal(err) + } + + if v != 1 { + t.Fatalf("expected query to return %v but got %v", 1, v) + } + }, + }, + } + + for _, test := range tests { + t.Run(test.Name, test.F) + } +} diff --git a/test/e2e/main_test.go b/test/e2e/main_test.go index 548425578a..05be2f1cf1 100644 --- a/test/e2e/main_test.go +++ b/test/e2e/main_test.go @@ -16,8 +16,8 @@ package e2e import ( "flag" + "fmt" "log" - "os" "testing" "time" @@ -31,13 +31,15 @@ import ( var f *framework.Framework func TestMain(m *testing.M) { - os.Exit(testMain(m)) + if err := testMain(m); err != nil { + log.Fatal(err) + } } // testMain circumvents the issue, that one can not call `defer` in TestMain, as // `os.Exit` does not honor `defer` statements. For more details see: // http://blog.englund.nu/golang,/testing/2017/03/12/using-defer-in-testmain.html -func testMain(m *testing.M) int { +func testMain(m *testing.M) error { kubeConfigPath := flag.String( "kubeconfig", clientcmd.RecommendedHomeFile, @@ -46,19 +48,17 @@ func testMain(m *testing.M) int { flag.Parse() - var err error - f, err = framework.New(*kubeConfigPath) - if err != nil { - log.Fatal(err) - } - - cleanUp, err := f.Setup() + var ( + err error + cleanUp func() error + ) + f, cleanUp, err = framework.New(*kubeConfigPath) // Check cleanUp first, in case of an err, we still want to clean up. if cleanUp != nil { defer cleanUp() } if err != nil { - log.Fatal(err) + return err } // Wait for Prometheus operator. @@ -70,96 +70,77 @@ func testMain(m *testing.M) int { return true, nil }) if err != nil { - log.Fatal(err) + return err } - return m.Run() -} + // Wait for Prometheus. + var loopErr error + err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) { + var ( + body []byte + v int + ) + body, loopErr = f.PrometheusK8sClient.Query("count(up{job=\"prometheus-k8s\"})") + if loopErr != nil { + return false, nil + } -type Query struct { - Query string - ExpectN int -} + v, loopErr = framework.GetFirstValueFromPromQuery(body) + if loopErr != nil { + return false, nil + } + + if v != 2 { + loopErr = fmt.Errorf("expected 2 Prometheus instances but got: %v", v) + return false, nil + } -func TestQueryPrometheus(t *testing.T) { - t.Parallel() - - queries := []Query{ - { - Query: `up{job="node-exporter"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="kubelet"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="scheduler"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="kube-controller-manager"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="apiserver"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="kube-state-metrics"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="prometheus-k8s"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="prometheus-operator"} == 1`, - ExpectN: 1, - }, { - Query: `up{job="alertmanager-main"} == 1`, - ExpectN: 2, - }, { - Query: `up{job="crio"} == 1`, - ExpectN: 1, - }, { - Query: `ALERTS{alertname="Watchdog"} == 1`, - ExpectN: 1, - }, { - Query: `namespace:container_memory_usage_bytes:sum`, - ExpectN: 1, - }, + return true, nil + }) + if err != nil { + return errors.Wrapf(err, "wait for prometheus-k8s: %v", loopErr) + } + + if m.Run() != 0 { + return errors.New("tests failed") } - RunTestQueries(t, time.Minute, queries) + return nil } -func RunTestQueries(t *testing.T, timeout time.Duration, queries []Query) { - promClient, err := framework.NewPrometheusClient(f.OpenshiftRouteClient, f.KubeClient) - if err != nil { - t.Fatal(err) - } - // Wait for pod to respond at queries at all. Then start verifying their results. - var loopErr error - err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) { - _, loopErr := promClient.Query("up") - return loopErr == nil, nil - }) - if err != nil { - t.Fatal(errors.Wrapf(err, "wait for prometheus-k8s: %v", loopErr)) +func TestTargetsUp(t *testing.T) { + // Don't run this test in parallel, as metrics might be influenced by other + // tests. + + targets := []string{ + "node-exporter", + "kubelet", + "scheduler", + "kube-controller-manager", + "apiserver", + "kube-state-metrics", + "prometheus-k8s", + "prometheus-operator", + "alertmanager-main", + "crio", } - err = wait.Poll(5*time.Second, timeout, func() (bool, error) { - defer t.Log("---------------------------\n") - - for _, q := range queries { - n, err := promClient.Query(q.Query) - if err != nil { - return false, err - } - if n < q.ExpectN { - // Don't return an error as targets may only become visible after a while. - t.Logf("expected at least %d results for %q but got %d", q.ExpectN, q.Query, n) - return false, nil - } - t.Logf("query %q succeeded", q.Query) - } - return true, nil - }) - if err != nil { - t.Fatal(err) + for _, target := range targets { + f.PrometheusK8sClient.WaitForQueryReturnOne( + t, + time.Minute, + "max(up{job=\""+target+"\"})", + ) } + +} + +// Once we have the need to test multiple recording rules, we can unite them in +// a single test function. +func TestMemoryUsageRecordingRule(t *testing.T) { + f.PrometheusK8sClient.WaitForQueryReturnGreaterEqualOne( + t, + time.Minute, + "count(namespace:container_memory_usage_bytes:sum)", + ) } diff --git a/test/e2e/multi_namespace_test.go b/test/e2e/multi_namespace_test.go index a6d8352d36..ffbf0070e6 100644 --- a/test/e2e/multi_namespace_test.go +++ b/test/e2e/multi_namespace_test.go @@ -67,10 +67,9 @@ func TestMultinamespacePrometheusRule(t *testing.T) { log.Fatal(err) } - RunTestQueries(t, 10*time.Minute, []Query{ - { - Query: `ALERTS{alertname="AdditionalTestAlertRule"} == 1`, - ExpectN: 1, - }, - }) + f.PrometheusK8sClient.WaitForQueryReturnOne( + t, + 10*time.Minute, + `count(ALERTS{alertname="AdditionalTestAlertRule"} == 1)`, + ) }