From 96f9e2d1244be795ade6ce4f49f4f3e6cd8e348e Mon Sep 17 00:00:00 2001 From: Warren Fernandes Date: Fri, 8 Nov 2019 15:03:12 -0700 Subject: [PATCH] Add metrics reconcile step for machine and cluster controllers Signed-off-by: Warren Fernandes --- controllers/cluster_controller.go | 75 ++-- controllers/cluster_controller_phases.go | 19 +- controllers/cluster_controller_phases_test.go | 85 ++-- controllers/cluster_controller_test.go | 380 ++++++++++++------ controllers/machine_controller.go | 51 +-- controllers/machine_controller_noderef.go | 10 +- controllers/machine_controller_phases.go | 20 +- controllers/machine_controller_phases_test.go | 43 +- controllers/machine_controller_test.go | 95 +++++ controllers/metrics/metrics.go | 108 +++++ go.mod | 2 +- 11 files changed, 555 insertions(+), 333 deletions(-) create mode 100644 controllers/metrics/metrics.go diff --git a/controllers/cluster_controller.go b/controllers/cluster_controller.go index 20dbf4a980b1..1876892fee86 100644 --- a/controllers/cluster_controller.go +++ b/controllers/cluster_controller.go @@ -25,7 +25,6 @@ import ( "github.com/go-logr/logr" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime" @@ -34,14 +33,15 @@ import ( "k8s.io/client-go/tools/record" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/controllers/metrics" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/cluster-api/util/secret" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" ) @@ -52,38 +52,6 @@ const ( deleteRequeueAfter = 5 * time.Second ) -var ( - clusterControlPlaneReady = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "capi_cluster_control_plane_ready", - Help: "Cluster control plane is ready if set to 1 and not if 0.", - }, - []string{"cluster", "namespace"}, - ) - clusterInfrastructureReady = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "capi_cluster_infrastructure_ready", - Help: "Cluster infrastructure is ready if set to 1 and not if 0.", - }, - []string{"cluster", "namespace"}, - ) - clusterKubeconfigReady = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "capi_cluster_kubeconfig_ready", - Help: "Cluster kubeconfig is ready if set to 1 and not if 0.", - }, - []string{"cluster", "namespace"}, - ) -) - -func init() { - metrics.Registry.MustRegister( - clusterControlPlaneReady, - clusterInfrastructureReady, - clusterKubeconfigReady, - ) -} - // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;patch // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete @@ -145,6 +113,7 @@ func (r *ClusterReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, reterr e defer func() { // Always reconcile the Status.Phase field. r.reconcilePhase(ctx, cluster) + r.reconcileMetrics(ctx, cluster) // Always attempt to Patch the Cluster object and status after each reconciliation. if err := patchHelper.Patch(ctx, cluster); err != nil { @@ -198,6 +167,35 @@ func (r *ClusterReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cl return res, kerrors.NewAggregate(errs) } +func (r *ClusterReconciler) reconcileMetrics(_ context.Context, cluster *clusterv1.Cluster) { + + if cluster.Status.ControlPlaneInitialized { + metrics.ClusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } else { + metrics.ClusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } + + if cluster.Status.InfrastructureReady { + metrics.ClusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } else { + metrics.ClusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } + + // TODO: [wfernandes] pass context here + _, err := secret.Get(r.Client, cluster, secret.Kubeconfig) + if err != nil { + metrics.ClusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } else { + metrics.ClusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } + + if cluster.Status.ErrorReason != nil || cluster.Status.ErrorMessage != nil { + metrics.ClusterErrorSet.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } else { + metrics.ClusterErrorSet.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } +} + // reconcileDelete handles cluster deletion. func (r *ClusterReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace) @@ -372,14 +370,7 @@ func splitMachineList(list *clusterv1.MachineList) (*clusterv1.MachineList, *clu return controlplanes, nodes } -func (r *ClusterReconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) (err error) { - defer func() { - if err != nil || !cluster.Status.ControlPlaneInitialized { - clusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) - } else { - clusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) - } - }() +func (r *ClusterReconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) error { logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace) if cluster.Status.ControlPlaneInitialized { diff --git a/controllers/cluster_controller_phases.go b/controllers/cluster_controller_phases.go index 258ef103815f..362a579df729 100644 --- a/controllers/cluster_controller_phases.go +++ b/controllers/cluster_controller_phases.go @@ -143,14 +143,7 @@ func (r *ClusterReconciler) reconcileExternal(ctx context.Context, cluster *clus } // reconcileInfrastructure reconciles the Spec.InfrastructureRef object on a Cluster. -func (r *ClusterReconciler) reconcileInfrastructure(ctx context.Context, cluster *clusterv1.Cluster) (err error) { - defer func() { - if err != nil || !cluster.Status.InfrastructureReady { - clusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) - } else { - clusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) - } - }() +func (r *ClusterReconciler) reconcileInfrastructure(ctx context.Context, cluster *clusterv1.Cluster) error { logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace) if cluster.Spec.InfrastructureRef == nil { @@ -194,15 +187,7 @@ func (r *ClusterReconciler) reconcileInfrastructure(ctx context.Context, cluster return nil } -func (r *ClusterReconciler) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster) (rerr error) { - defer func() { - if rerr != nil || len(cluster.Status.APIEndpoints) == 0 { - clusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) - } else { - clusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) - } - }() - +func (r *ClusterReconciler) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster) error { if len(cluster.Status.APIEndpoints) == 0 { return nil } diff --git a/controllers/cluster_controller_phases_test.go b/controllers/cluster_controller_phases_test.go index ac793b829acc..097bcd85a34b 100644 --- a/controllers/cluster_controller_phases_test.go +++ b/controllers/cluster_controller_phases_test.go @@ -31,10 +31,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/metrics" ) -func TestClusterReconciler(t *testing.T) { +func TestClusterReconcilePhases(t *testing.T) { t.Run("reconcile infrastructure", func(t *testing.T) { cluster := &clusterv1.Cluster{ ObjectMeta: v1.ObjectMeta{ @@ -58,23 +57,20 @@ func TestClusterReconciler(t *testing.T) { } tests := []struct { - name string - cluster *clusterv1.Cluster - infraRef map[string]interface{} - expectErr bool - expectedMetric float64 + name string + cluster *clusterv1.Cluster + infraRef map[string]interface{} + expectErr bool }{ { - name: "returns no error if infrastructure ref is nil", - cluster: &clusterv1.Cluster{ObjectMeta: v1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}}, - expectErr: false, - expectedMetric: 0, + name: "returns no error if infrastructure ref is nil", + cluster: &clusterv1.Cluster{ObjectMeta: v1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}}, + expectErr: false, }, { - name: "returns error if unable to reconcile infrastructure ref", - cluster: cluster, - expectErr: true, - expectedMetric: 0, + name: "returns error if unable to reconcile infrastructure ref", + cluster: cluster, + expectErr: true, }, { name: "returns no error if infra config is marked for deletion", @@ -88,11 +84,10 @@ func TestClusterReconciler(t *testing.T) { "deletionTimestamp": "sometime", }, }, - expectErr: false, - expectedMetric: 1, + expectErr: false, }, { - name: "returns no error and sets metric to 1 if infrastructure is marked ready on cluster", + name: "returns no error if infrastructure is marked ready on cluster", cluster: cluster, infraRef: map[string]interface{}{ "kind": "InfrastructureConfig", @@ -103,8 +98,7 @@ func TestClusterReconciler(t *testing.T) { "deletionTimestamp": "sometime", }, }, - expectErr: false, - expectedMetric: 1, + expectErr: false, }, } @@ -134,12 +128,6 @@ func TestClusterReconciler(t *testing.T) { } else { Expect(err).ToNot(HaveOccurred()) } - - mr, err := metrics.Registry.Gather() - Expect(err).ToNot(HaveOccurred()) - mf := getMetricFamily(mr, "capi_cluster_infrastructure_ready") - Expect(mf).ToNot(BeNil()) - Expect(mf.GetMetric()[0].GetGauge().GetValue()).To(Equal(tt.expectedMetric)) }) } @@ -159,18 +147,16 @@ func TestClusterReconciler(t *testing.T) { } tests := []struct { - name string - cluster *clusterv1.Cluster - secret *corev1.Secret - wantErr bool - wantRequeue bool - expectedMetric float64 + name string + cluster *clusterv1.Cluster + secret *corev1.Secret + wantErr bool + wantRequeue bool }{ { - name: "cluster not provisioned, apiEndpoint is not set", - cluster: &clusterv1.Cluster{}, - wantErr: false, - expectedMetric: 0, + name: "cluster not provisioned, apiEndpoint is not set", + cluster: &clusterv1.Cluster{}, + wantErr: false, }, { name: "kubeconfig secret found", @@ -180,15 +166,13 @@ func TestClusterReconciler(t *testing.T) { Name: "test-cluster-kubeconfig", }, }, - wantErr: false, - expectedMetric: 1, + wantErr: false, }, { - name: "kubeconfig secret not found, should return RequeueAfterError", - cluster: cluster, - wantErr: true, - wantRequeue: true, - expectedMetric: 0, + name: "kubeconfig secret not found, should return RequeueAfterError", + cluster: cluster, + wantErr: true, + wantRequeue: true, }, { name: "invalid ca secret, should return error", @@ -198,8 +182,7 @@ func TestClusterReconciler(t *testing.T) { Name: "test-cluster-ca", }, }, - wantErr: true, - expectedMetric: 0, + wantErr: true, }, } for _, tt := range tests { @@ -226,18 +209,6 @@ func TestClusterReconciler(t *testing.T) { if tt.wantRequeue != hasRequeErr { t.Errorf("expected RequeAfterError = %v, got %v", tt.wantRequeue, hasRequeErr) } - - mr, err := metrics.Registry.Gather() - Expect(err).ToNot(HaveOccurred()) - mf := getMetricFamily(mr, "capi_cluster_kubeconfig_ready") - Expect(mf).ToNot(BeNil()) - for _, m := range mf.GetMetric() { - for _, lp := range m.GetLabel() { - if lp.GetName() == "cluster" && lp.GetValue() == "test-cluster" { - Expect(m.GetGauge().GetValue()).To(Equal(tt.expectedMetric)) - } - } - } }) } }) diff --git a/controllers/cluster_controller_test.go b/controllers/cluster_controller_test.go index 08fc6cbf07f9..187cbcf58870 100644 --- a/controllers/cluster_controller_test.go +++ b/controllers/cluster_controller_test.go @@ -17,16 +17,21 @@ limitations under the License. package controllers import ( + "context" "reflect" "testing" + "github.com/gogo/protobuf/proto" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" @@ -322,147 +327,274 @@ var _ = Describe("Cluster Reconciler", func() { } return cluster.Status.ControlPlaneInitialized }, timeout).Should(BeTrue()) + }) +}) + +func TestClusterReconciler(t *testing.T) { + t.Run("metrics", func(t *testing.T) { + + errorReason := capierrors.ClusterStatusError("foo") + tests := []struct { + name string + cs clusterv1.ClusterStatus + secret *corev1.Secret + expectedMetrics map[string]float64 + }{ + { + name: "cluster control plane metric is 1 if cluster status is true ", + cs: clusterv1.ClusterStatus{ + ControlPlaneInitialized: true, + }, + expectedMetrics: map[string]float64{"capi_cluster_control_plane_ready": 1}, + }, + { + name: "cluster control plane metric is 0 if cluster status is false ", + cs: clusterv1.ClusterStatus{ + ControlPlaneInitialized: false, + }, + expectedMetrics: map[string]float64{"capi_cluster_control_plane_ready": 0}, + }, + { + name: "cluster infrastructure metric is 1 if cluster status is true ", + cs: clusterv1.ClusterStatus{ + InfrastructureReady: true, + }, + expectedMetrics: map[string]float64{"capi_cluster_infrastructure_ready": 1}, + }, + { + name: "cluster infrastructure metric is 0 if cluster status is false ", + cs: clusterv1.ClusterStatus{ + InfrastructureReady: false, + }, + expectedMetrics: map[string]float64{"capi_cluster_infrastructure_ready": 0}, + }, + { + name: "cluster kubeconfig metric is 0 if secret is unavailable", + cs: clusterv1.ClusterStatus{}, + expectedMetrics: map[string]float64{"capi_cluster_kubeconfig_ready": 0}, + }, + { + name: "cluster kubeconfig metric is 1 if secret is available and ready", + cs: clusterv1.ClusterStatus{}, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster-kubeconfig", + }, + }, + expectedMetrics: map[string]float64{"capi_cluster_kubeconfig_ready": 1}, + }, + { + name: "cluster error metric is 1 if ErrorReason is set", + cs: clusterv1.ClusterStatus{ + ErrorReason: &errorReason, + }, + expectedMetrics: map[string]float64{"capi_cluster_error_set": 1}, + }, + { + name: "cluster error metric is 1 if ErrorMessage is set", + cs: clusterv1.ClusterStatus{ + ErrorMessage: proto.String("some-error"), + }, + expectedMetrics: map[string]float64{"capi_cluster_error_set": 1}, + }, + { + name: "cluster is ready", + cs: clusterv1.ClusterStatus{ + InfrastructureReady: true, + ControlPlaneInitialized: true, + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster-kubeconfig", + }, + }, + expectedMetrics: map[string]float64{ + "capi_cluster_control_plane_ready": 1, + "capi_cluster_infrastructure_ready": 1, + "capi_cluster_kubeconfig_ready": 1, + "capi_cluster_error_set": 0, + }, + }, + } - mr, err := metrics.Registry.Gather() - Expect(err).ToNot(HaveOccurred()) - mf := getMetricFamily(mr, "capi_cluster_control_plane_ready") - Expect(mf).ToNot(BeNil()) - for _, m := range mf.GetMetric() { - for _, l := range m.GetLabel() { - if l.GetName() == "cluster" && l.GetValue() == cluster.Name { - Expect(m.GetGauge().GetValue()).To(Equal(float64(1))) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + RegisterTestingT(t) + err := clusterv1.AddToScheme(scheme.Scheme) + if err != nil { + t.Fatal(err) } - } + var objs []runtime.Object + + c := &clusterv1.Cluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "Cluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + Spec: clusterv1.ClusterSpec{}, + Status: tt.cs, + } + objs = append(objs, c) + + if tt.secret != nil { + objs = append(objs, tt.secret) + } + + r := &ClusterReconciler{ + Client: fake.NewFakeClient(objs...), + Log: log.Log, + } + + r.reconcileMetrics(context.TODO(), c) + + for em, ev := range tt.expectedMetrics { + mr, err := metrics.Registry.Gather() + Expect(err).ToNot(HaveOccurred()) + mf := getMetricFamily(mr, em) + Expect(mf).ToNot(BeNil()) + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + // ensure that the metric has a matching label + if l.GetName() == "cluster" && l.GetValue() == c.Name { + Expect(m.GetGauge().GetValue()).To(Equal(ev)) + } + } + } + } + + }) } }) -}) -func TestClusterReconciler_machineToCluster(t *testing.T) { - cluster := &clusterv1.Cluster{ - TypeMeta: metav1.TypeMeta{ - Kind: "Cluster", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test", - }, - Spec: clusterv1.ClusterSpec{}, - Status: clusterv1.ClusterStatus{}, - } + t.Run("machine to cluster", func(t *testing.T) { + cluster := &clusterv1.Cluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "Cluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test", + }, + Spec: clusterv1.ClusterSpec{}, + Status: clusterv1.ClusterStatus{}, + } - controlPlaneWithNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "controlPlaneWithNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, - clusterv1.MachineControlPlaneLabelName: "", + controlPlaneWithNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", }, - }, - Status: clusterv1.MachineStatus{ - NodeRef: &v1.ObjectReference{ - Kind: "Node", - Namespace: "test-node", + ObjectMeta: metav1.ObjectMeta{ + Name: "controlPlaneWithNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + clusterv1.MachineControlPlaneLabelName: "", + }, }, - }, - } - controlPlaneWithoutNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "controlPlaneWithoutNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, - clusterv1.MachineControlPlaneLabelName: "", + Status: clusterv1.MachineStatus{ + NodeRef: &v1.ObjectReference{ + Kind: "Node", + Namespace: "test-node", + }, }, - }, - } - nonControlPlaneWithNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "nonControlPlaneWitNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, + } + controlPlaneWithoutNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", }, - }, - Status: clusterv1.MachineStatus{ - NodeRef: &v1.ObjectReference{ - Kind: "Node", - Namespace: "test-node", + ObjectMeta: metav1.ObjectMeta{ + Name: "controlPlaneWithoutNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + clusterv1.MachineControlPlaneLabelName: "", + }, }, - }, - } - nonControlPlaneWithoutNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "nonControlPlaneWithoutNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, + } + nonControlPlaneWithNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", }, - }, - } + ObjectMeta: metav1.ObjectMeta{ + Name: "nonControlPlaneWitNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + }, + }, + Status: clusterv1.MachineStatus{ + NodeRef: &v1.ObjectReference{ + Kind: "Node", + Namespace: "test-node", + }, + }, + } + nonControlPlaneWithoutNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "nonControlPlaneWithoutNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + }, + }, + } - tests := []struct { - name string - o handler.MapObject - want []ctrl.Request - }{ - { - name: "controlplane machine, noderef is set, should return cluster", - o: handler.MapObject{ - Meta: controlPlaneWithNoderef.GetObjectMeta(), - Object: controlPlaneWithNoderef, - }, - want: []ctrl.Request{ - {NamespacedName: client.ObjectKey{ - Name: cluster.Name, - Namespace: cluster.Namespace, - }}, + tests := []struct { + name string + o handler.MapObject + want []ctrl.Request + }{ + { + name: "controlplane machine, noderef is set, should return cluster", + o: handler.MapObject{ + Meta: controlPlaneWithNoderef.GetObjectMeta(), + Object: controlPlaneWithNoderef, + }, + want: []ctrl.Request{ + {NamespacedName: client.ObjectKey{ + Name: cluster.Name, + Namespace: cluster.Namespace, + }}, + }, }, - }, - { - name: "controlplane machine, noderef is not set", - o: handler.MapObject{ - Meta: controlPlaneWithoutNoderef.GetObjectMeta(), - Object: controlPlaneWithoutNoderef, + { + name: "controlplane machine, noderef is not set", + o: handler.MapObject{ + Meta: controlPlaneWithoutNoderef.GetObjectMeta(), + Object: controlPlaneWithoutNoderef, + }, + want: nil, }, - want: nil, - }, - { - name: "not controlplane machine, noderef is set", - o: handler.MapObject{ - Meta: nonControlPlaneWithNoderef.GetObjectMeta(), - Object: nonControlPlaneWithNoderef, + { + name: "not controlplane machine, noderef is set", + o: handler.MapObject{ + Meta: nonControlPlaneWithNoderef.GetObjectMeta(), + Object: nonControlPlaneWithNoderef, + }, + want: nil, }, - want: nil, - }, - { - name: "not controlplane machine, noderef is not set", - o: handler.MapObject{ - Meta: nonControlPlaneWithoutNoderef.GetObjectMeta(), - Object: nonControlPlaneWithoutNoderef, + { + name: "not controlplane machine, noderef is not set", + o: handler.MapObject{ + Meta: nonControlPlaneWithoutNoderef.GetObjectMeta(), + Object: nonControlPlaneWithoutNoderef, + }, + want: nil, }, - want: nil, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - r := &ClusterReconciler{ - Client: fake.NewFakeClient(cluster, controlPlaneWithNoderef, controlPlaneWithoutNoderef, nonControlPlaneWithNoderef, nonControlPlaneWithoutNoderef), - Log: log.Log, - } - if got := r.controlPlaneMachineToCluster(tt.o); !reflect.DeepEqual(got, tt.want) { - t.Errorf("controlPlaneMachineToCluster() = %v, want %v", got, tt.want) - } - }) - } + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := &ClusterReconciler{ + Client: fake.NewFakeClient(cluster, controlPlaneWithNoderef, controlPlaneWithoutNoderef, nonControlPlaneWithNoderef, nonControlPlaneWithoutNoderef), + Log: log.Log, + } + if got := r.controlPlaneMachineToCluster(tt.o); !reflect.DeepEqual(got, tt.want) { + t.Errorf("controlPlaneMachineToCluster() = %v, want %v", got, tt.want) + } + }) + } + }) } type machineDeploymentBuilder struct { diff --git a/controllers/machine_controller.go b/controllers/machine_controller.go index 0f205755cf44..dc5cc26a60e1 100644 --- a/controllers/machine_controller.go +++ b/controllers/machine_controller.go @@ -24,7 +24,6 @@ import ( "github.com/go-logr/logr" "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -38,6 +37,7 @@ import ( "k8s.io/klog" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/controllers/metrics" "sigs.k8s.io/cluster-api/controllers/remote" capierrors "sigs.k8s.io/cluster-api/errors" kubedrain "sigs.k8s.io/cluster-api/third_party/kubernetes-drain" @@ -46,44 +46,14 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/metrics" ) var ( errNilNodeRef = errors.New("noderef is nil") errLastControlPlaneNode = errors.New("last control plane member") errNoControlPlaneNodes = errors.New("no control plane members") - machineBootstrapReady = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "capi_machine_bootstrap_ready", - Help: "Machine Boostrap is ready if set to 1 and not if 0.", - }, - []string{"machine", "namespace", "cluster"}, - ) - machineInfrastructureReady = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "capi_machine_infrastructure_ready", - Help: "Machine InfrastructureRef is ready if set to 1 and not if 0.", - }, - []string{"machine", "namespace", "cluster"}, - ) - machineNodeReady = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: "capi_machine_node_ready", - Help: "Machine NodeRef is ready if set to 1 and not if 0.", - }, - []string{"machine", "namespace", "cluster"}, - ) ) -func init() { - metrics.Registry.MustRegister( - machineBootstrapReady, - machineInfrastructureReady, - machineNodeReady, - ) -} - // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete @@ -145,6 +115,7 @@ func (r *MachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, reterr e defer func() { // Always reconcile the Status.Phase field. r.reconcilePhase(ctx, m) + r.reconcileMetrics(ctx, m) // Always attempt to Patch the Machine object and status after each reconciliation. if err := patchHelper.Patch(ctx, m); err != nil { @@ -220,6 +191,24 @@ func (r *MachineReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cl return res, kerrors.NewAggregate(errs) } +func (r *MachineReconciler) reconcileMetrics(_ context.Context, m *clusterv1.Machine) { + if m.Status.BootstrapReady { + metrics.MachineBootstrapReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) + } else { + metrics.MachineBootstrapReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) + } + if m.Status.InfrastructureReady { + metrics.MachineInfrastructureReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) + } else { + metrics.MachineInfrastructureReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) + } + if m.Status.NodeRef != nil { + metrics.MachineNodeReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) + } else { + metrics.MachineNodeReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) + } +} + func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { logger := r.Log.WithValues("machine", m.Name, "namespace", m.Namespace) logger = logger.WithValues("cluster", cluster.Name) diff --git a/controllers/machine_controller_noderef.go b/controllers/machine_controller_noderef.go index e88f052a7be4..12052f250854 100644 --- a/controllers/machine_controller_noderef.go +++ b/controllers/machine_controller_noderef.go @@ -34,15 +34,7 @@ var ( ErrNodeNotFound = errors.New("cannot find node with matching ProviderID") ) -func (r *MachineReconciler) reconcileNodeRef(_ context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (err error) { - defer func() { - if err != nil || machine.Status.NodeRef == nil { - machineNodeReady.WithLabelValues(machine.Name, machine.Namespace, machine.Spec.ClusterName).Set(0) - } else { - machineNodeReady.WithLabelValues(machine.Name, machine.Namespace, machine.Spec.ClusterName).Set(1) - } - }() - +func (r *MachineReconciler) reconcileNodeRef(_ context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error { logger := r.Log.WithValues("machine", machine.Name, "namespace", machine.Namespace) // Check that the Machine hasn't been deleted or in the process. if !machine.DeletionTimestamp.IsZero() { diff --git a/controllers/machine_controller_phases.go b/controllers/machine_controller_phases.go index 5f6dc5103942..4ed9dc47eedb 100644 --- a/controllers/machine_controller_phases.go +++ b/controllers/machine_controller_phases.go @@ -151,15 +151,7 @@ func (r *MachineReconciler) reconcileExternal(ctx context.Context, m *clusterv1. } // reconcileBootstrap reconciles the Spec.Bootstrap.ConfigRef object on a Machine. -func (r *MachineReconciler) reconcileBootstrap(ctx context.Context, m *clusterv1.Machine) (err error) { - defer func() { - if err != nil || !m.Status.BootstrapReady { - machineBootstrapReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) - } else { - machineBootstrapReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) - } - }() - +func (r *MachineReconciler) reconcileBootstrap(ctx context.Context, m *clusterv1.Machine) error { // TODO(vincepri): Move this validation in kubebuilder / webhook. if m.Spec.Bootstrap.ConfigRef == nil && m.Spec.Bootstrap.Data == nil { return errors.Errorf( @@ -212,15 +204,7 @@ func (r *MachineReconciler) reconcileBootstrap(ctx context.Context, m *clusterv1 } // reconcileInfrastructure reconciles the Spec.InfrastructureRef object on a Machine. -func (r *MachineReconciler) reconcileInfrastructure(ctx context.Context, m *clusterv1.Machine) (err error) { - defer func() { - if err != nil || !m.Status.InfrastructureReady { - machineInfrastructureReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) - } else { - machineInfrastructureReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) - } - }() - +func (r *MachineReconciler) reconcileInfrastructure(ctx context.Context, m *clusterv1.Machine) error { // Call generic external reconciler. infraConfig, err := r.reconcileExternal(ctx, m, &m.Spec.InfrastructureRef) if infraConfig == nil && err == nil { diff --git a/controllers/machine_controller_phases_test.go b/controllers/machine_controller_phases_test.go index 6b626ce222cd..b788695ac542 100644 --- a/controllers/machine_controller_phases_test.go +++ b/controllers/machine_controller_phases_test.go @@ -34,7 +34,6 @@ import ( "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" - "sigs.k8s.io/controller-runtime/pkg/metrics" ) func init() { @@ -379,7 +378,6 @@ func TestReconcileBootstrap(t *testing.T) { machine *clusterv1.Machine expectError bool expected func(g *WithT, m *clusterv1.Machine) - expectedMetric float64 }{ { name: "new machine, bootstrap config ready with data", @@ -396,8 +394,7 @@ func TestReconcileBootstrap(t *testing.T) { "bootstrapData": "#!/bin/bash ... data", }, }, - expectError: false, - expectedMetric: 1, + expectError: false, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.BootstrapReady).To(BeTrue()) g.Expect(m.Spec.Bootstrap.Data).ToNot(BeNil()) @@ -418,8 +415,7 @@ func TestReconcileBootstrap(t *testing.T) { "ready": true, }, }, - expectError: true, - expectedMetric: 0, + expectError: true, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.BootstrapReady).To(BeFalse()) g.Expect(m.Spec.Bootstrap.Data).To(BeNil()) @@ -437,8 +433,7 @@ func TestReconcileBootstrap(t *testing.T) { "spec": map[string]interface{}{}, "status": map[string]interface{}{}, }, - expectError: true, - expectedMetric: 0, + expectError: true, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.BootstrapReady).To(BeFalse()) }, @@ -455,8 +450,7 @@ func TestReconcileBootstrap(t *testing.T) { "spec": map[string]interface{}{}, "status": map[string]interface{}{}, }, - expectError: true, - expectedMetric: 0, + expectError: true, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.BootstrapReady).To(BeFalse()) }, @@ -473,8 +467,7 @@ func TestReconcileBootstrap(t *testing.T) { "spec": map[string]interface{}{}, "status": map[string]interface{}{}, }, - expectedMetric: 0, - expectError: true, + expectError: true, }, { name: "existing machine, bootstrap data should not change", @@ -510,8 +503,7 @@ func TestReconcileBootstrap(t *testing.T) { BootstrapReady: true, }, }, - expectError: false, - expectedMetric: 1, + expectError: false, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.BootstrapReady).To(BeTrue()) g.Expect(*m.Spec.Bootstrap.Data).To(Equal("#!/bin/bash ... data")) @@ -551,8 +543,7 @@ func TestReconcileBootstrap(t *testing.T) { BootstrapReady: true, }, }, - expectError: false, - expectedMetric: 1, + expectError: false, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.BootstrapReady).To(BeTrue()) }, @@ -585,12 +576,6 @@ func TestReconcileBootstrap(t *testing.T) { if tc.expected != nil { tc.expected(g, tc.machine) } - - mt, err := metrics.Registry.Gather() - g.Expect(err).ToNot(HaveOccurred()) - mf := getMetricFamily(mt, "capi_machine_bootstrap_ready") - g.Expect(mf).ToNot(BeNil()) - g.Expect(mf.GetMetric()[0].GetGauge().GetValue()).To(Equal(tc.expectedMetric)) }) } @@ -630,7 +615,6 @@ func TestReconcileInfrastructure(t *testing.T) { expectError bool expectChanged bool expectRequeueAfter bool - expectedMetric float64 expected func(g *WithT, m *clusterv1.Machine) }{ { @@ -659,9 +643,8 @@ func TestReconcileInfrastructure(t *testing.T) { }, }, }, - expectError: false, - expectChanged: true, - expectedMetric: 1, + expectError: false, + expectChanged: true, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.InfrastructureReady).To(BeTrue()) }, @@ -713,13 +696,11 @@ func TestReconcileInfrastructure(t *testing.T) { }, expectError: true, expectRequeueAfter: true, - expectedMetric: 0, expected: func(g *WithT, m *clusterv1.Machine) { g.Expect(m.Status.InfrastructureReady).To(BeTrue()) g.Expect(m.Status.ErrorMessage).ToNot(BeNil()) g.Expect(m.Status.ErrorReason).ToNot(BeNil()) g.Expect(m.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseFailed)) - }, }, } @@ -748,12 +729,6 @@ func TestReconcileInfrastructure(t *testing.T) { g.Expect(err).To(BeNil()) } - mt, err := metrics.Registry.Gather() - g.Expect(err).ToNot(HaveOccurred()) - mf := getMetricFamily(mt, "capi_machine_infrastructure_ready") - g.Expect(mf).ToNot(BeNil()) - g.Expect(mf.GetMetric()[0].GetGauge().GetValue()).To(Equal(tc.expectedMetric)) - if tc.expected != nil { tc.expected(g, tc.machine) } diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go index 207036bebf68..d5777a13c018 100644 --- a/controllers/machine_controller_test.go +++ b/controllers/machine_controller_test.go @@ -17,6 +17,7 @@ limitations under the License. package controllers import ( + "context" "testing" . "github.com/onsi/gomega" @@ -31,6 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -553,3 +555,96 @@ func TestRemoveMachineFinalizerAfterDeleteReconcile(t *testing.T) { Expect(mr.Client.Get(ctx, key, m)).ToNot(HaveOccurred()) Expect(m.ObjectMeta.Finalizers).To(Equal([]string{metav1.FinalizerDeleteDependents})) } + +func TestReconcileMetrics(t *testing.T) { + RegisterTestingT(t) + + err := clusterv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + tests := []struct { + name string + ms clusterv1.MachineStatus + expectedMetrics map[string]float64 + }{ + { + name: "machine bootstrap metric is set to 1 if ready", + ms: clusterv1.MachineStatus{ + BootstrapReady: true, + }, + expectedMetrics: map[string]float64{"capi_machine_bootstrap_ready": 1}, + }, + { + name: "machine bootstrap metric is set to 0 if not ready", + ms: clusterv1.MachineStatus{ + BootstrapReady: false, + }, + expectedMetrics: map[string]float64{"capi_machine_bootstrap_ready": 0}, + }, + { + name: "machine infrastructure metric is set to 1 if ready", + ms: clusterv1.MachineStatus{ + InfrastructureReady: true, + }, + expectedMetrics: map[string]float64{"capi_machine_infrastructure_ready": 1}, + }, + { + name: "machine infrastructure metric is set to 0 if not ready", + ms: clusterv1.MachineStatus{ + InfrastructureReady: false, + }, + expectedMetrics: map[string]float64{"capi_machine_infrastructure_ready": 0}, + }, + { + name: "machine node metric is set to 1 if node ref exists", + ms: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{ + Name: "test", + }, + }, + expectedMetrics: map[string]float64{"capi_machine_node_ready": 1}, + }, + { + name: "machine infrastructure metric is set to 0 if not ready", + ms: clusterv1.MachineStatus{}, + expectedMetrics: map[string]float64{"capi_machine_node_ready": 0}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var objs []runtime.Object + machine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + }, + Spec: clusterv1.MachineSpec{}, + Status: tt.ms, + } + objs = append(objs, machine) + + r := &MachineReconciler{ + Client: fake.NewFakeClient(objs...), + Log: log.Log, + } + + r.reconcileMetrics(context.TODO(), machine) + + for em, ev := range tt.expectedMetrics { + mr, err := metrics.Registry.Gather() + Expect(err).ToNot(HaveOccurred()) + mf := getMetricFamily(mr, em) + Expect(mf).ToNot(BeNil()) + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + // ensure that the metric has a matching label + if l.GetName() == "machine" && l.GetValue() == machine.Name { + Expect(m.GetGauge().GetValue()).To(Equal(ev)) + } + } + } + } + }) + } + +} diff --git a/controllers/metrics/metrics.go b/controllers/metrics/metrics.go new file mode 100644 index 000000000000..e2895682459d --- /dev/null +++ b/controllers/metrics/metrics.go @@ -0,0 +1,108 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package metrics defines the metrics available for the cluster api +// controllers. +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + // ClusterControlPlaneReady is a metric that is set to 1 if the cluster + // control plane is ready and 0 if it is not. + ClusterControlPlaneReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_control_plane_ready", + Help: "Cluster control plane is ready if set to 1 and not if 0.", + }, + []string{"cluster", "namespace"}, + ) + + // ClusterInfrastructureReady is a metric that is set to 1 if the cluster + // infrastructure is ready and 0 if it is not. + ClusterInfrastructureReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_infrastructure_ready", + Help: "Cluster infrastructure is ready if set to 1 and not if 0.", + }, + []string{"cluster", "namespace"}, + ) + + // ClusterKubeconfigReady is a metric that is set to 1 if the cluster + // kubeconfig secret has been created and 0 if it is not. + ClusterKubeconfigReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_kubeconfig_ready", + Help: "Cluster kubeconfig is ready if set to 1 and not if 0.", + }, + []string{"cluster", "namespace"}, + ) + + // ClusterErrorSet is a metric that is set to 1 if the cluster ErrorReason + // or ErrorMessage is set and 0 if it is not. + ClusterErrorSet = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_error_set", + Help: "Cluster error messsage or reason is set if metric is 1.", + }, + []string{"cluster", "namespace"}, + ) + + // MachineBootstrapReady is a metric that is set to 1 if machine bootstrap + // is ready and 0 if it is not. + MachineBootstrapReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_machine_bootstrap_ready", + Help: "Machine Boostrap is ready if set to 1 and not if 0.", + }, + []string{"machine", "namespace", "cluster"}, + ) + + // MachineInfrastructureReady is a metric that is set to 1 if machine + // infrastructure is ready and 0 if it is not. + MachineInfrastructureReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_machine_infrastructure_ready", + Help: "Machine InfrastructureRef is ready if set to 1 and not if 0.", + }, + []string{"machine", "namespace", "cluster"}, + ) + + // MachineNodeReady is a metric that is set to 1 if machine node is ready + // and 0 if it is not. + MachineNodeReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_machine_node_ready", + Help: "Machine NodeRef is ready if set to 1 and not if 0.", + }, + []string{"machine", "namespace", "cluster"}, + ) +) + +func init() { + metrics.Registry.MustRegister( + ClusterControlPlaneReady, + ClusterInfrastructureReady, + ClusterKubeconfigReady, + ClusterErrorSet, + MachineBootstrapReady, + MachineInfrastructureReady, + MachineNodeReady, + ) +} diff --git a/go.mod b/go.mod index a0b6952051aa..b5f2cc15bd37 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/davecgh/go-spew v1.1.1 github.com/go-logr/logr v0.1.0 - github.com/gogo/protobuf v1.2.1 // indirect + github.com/gogo/protobuf v1.2.1 github.com/google/go-cmp v0.3.1 // indirect github.com/gophercloud/gophercloud v0.3.0 // indirect github.com/hashicorp/golang-lru v0.5.3 // indirect