From 2b72a3a53a7d2ecdd85244f346122a3f63ebf59b Mon Sep 17 00:00:00 2001 From: Warren Fernandes Date: Wed, 6 Nov 2019 16:59:08 -0700 Subject: [PATCH] Add metrics to machine and cluster controller Signed-off-by: Warren Fernandes --- controllers/cluster_controller.go | 32 ++ controllers/cluster_controller_phases_test.go | 252 ++++++++---- controllers/cluster_controller_test.go | 374 ++++++++++++------ controllers/machine_controller.go | 20 + controllers/machine_controller_phases_test.go | 82 ++-- controllers/machine_controller_test.go | 95 +++++ controllers/metrics/metrics.go | 108 +++++ go.mod | 4 +- 8 files changed, 744 insertions(+), 223 deletions(-) create mode 100644 controllers/metrics/metrics.go diff --git a/controllers/cluster_controller.go b/controllers/cluster_controller.go index bf0ee6a40d02..1876892fee86 100644 --- a/controllers/cluster_controller.go +++ b/controllers/cluster_controller.go @@ -33,9 +33,11 @@ import ( "k8s.io/client-go/tools/record" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/controllers/metrics" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/cluster-api/util/secret" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -111,6 +113,7 @@ func (r *ClusterReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, reterr e defer func() { // Always reconcile the Status.Phase field. r.reconcilePhase(ctx, cluster) + r.reconcileMetrics(ctx, cluster) // Always attempt to Patch the Cluster object and status after each reconciliation. if err := patchHelper.Patch(ctx, cluster); err != nil { @@ -164,6 +167,35 @@ func (r *ClusterReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cl return res, kerrors.NewAggregate(errs) } +func (r *ClusterReconciler) reconcileMetrics(_ context.Context, cluster *clusterv1.Cluster) { + + if cluster.Status.ControlPlaneInitialized { + metrics.ClusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } else { + metrics.ClusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } + + if cluster.Status.InfrastructureReady { + metrics.ClusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } else { + metrics.ClusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } + + // TODO: [wfernandes] pass context here + _, err := secret.Get(r.Client, cluster, secret.Kubeconfig) + if err != nil { + metrics.ClusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } else { + metrics.ClusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } + + if cluster.Status.ErrorReason != nil || cluster.Status.ErrorMessage != nil { + metrics.ClusterErrorSet.WithLabelValues(cluster.Name, cluster.Namespace).Set(1) + } else { + metrics.ClusterErrorSet.WithLabelValues(cluster.Name, cluster.Namespace).Set(0) + } +} + // reconcileDelete handles cluster deletion. func (r *ClusterReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace) diff --git a/controllers/cluster_controller_phases_test.go b/controllers/cluster_controller_phases_test.go index 5a9a6a8257f7..097bcd85a34b 100644 --- a/controllers/cluster_controller_phases_test.go +++ b/controllers/cluster_controller_phases_test.go @@ -20,90 +20,196 @@ import ( "context" "testing" + . "github.com/onsi/gomega" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/client-go/kubernetes/scheme" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" capierrors "sigs.k8s.io/cluster-api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/log" ) -func TestClusterReconciler_reconcileKubeconfig(t *testing.T) { - cluster := &clusterv1.Cluster{ - ObjectMeta: v1.ObjectMeta{ - Name: "test-cluster", - }, - Status: clusterv1.ClusterStatus{ - APIEndpoints: []clusterv1.APIEndpoint{{ - Host: "1.2.3.4", - Port: 0, - }}, - }, - } - - tests := []struct { - name string - cluster *clusterv1.Cluster - secret *corev1.Secret - wantErr bool - wantRequeue bool - }{ - { - name: "cluster not provisioned, apiEndpoint is not set", - cluster: &clusterv1.Cluster{}, - wantErr: false, - }, - { - name: "kubeconfig secret found", - cluster: cluster, - secret: &corev1.Secret{ - ObjectMeta: v1.ObjectMeta{ - Name: "test-cluster-kubeconfig", +func TestClusterReconcilePhases(t *testing.T) { + t.Run("reconcile infrastructure", func(t *testing.T) { + cluster := &clusterv1.Cluster{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-namespace", + }, + Status: clusterv1.ClusterStatus{ + APIEndpoints: []clusterv1.APIEndpoint{{ + Host: "1.2.3.4", + Port: 0, + }}, + InfrastructureReady: true, + }, + Spec: clusterv1.ClusterSpec{ + InfrastructureRef: &corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1alpha2", + Kind: "InfrastructureConfig", + Name: "test", + }, + }, + } + + tests := []struct { + name string + cluster *clusterv1.Cluster + infraRef map[string]interface{} + expectErr bool + }{ + { + name: "returns no error if infrastructure ref is nil", + cluster: &clusterv1.Cluster{ObjectMeta: v1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}}, + expectErr: false, + }, + { + name: "returns error if unable to reconcile infrastructure ref", + cluster: cluster, + expectErr: true, + }, + { + name: "returns no error if infra config is marked for deletion", + cluster: cluster, + infraRef: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha2", + "metadata": map[string]interface{}{ + "name": "test", + "namespace": "test-namespace", + "deletionTimestamp": "sometime", + }, }, + expectErr: false, }, - wantErr: false, - }, - { - name: "kubeconfig secret not found, should return RequeueAfterError", - cluster: cluster, - wantErr: true, - wantRequeue: true, - }, - { - name: "invalid ca secret, should return error", - cluster: cluster, - secret: &corev1.Secret{ - ObjectMeta: v1.ObjectMeta{ - Name: "test-cluster-ca", + { + name: "returns no error if infrastructure is marked ready on cluster", + cluster: cluster, + infraRef: map[string]interface{}{ + "kind": "InfrastructureConfig", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha2", + "metadata": map[string]interface{}{ + "name": "test", + "namespace": "test-namespace", + "deletionTimestamp": "sometime", + }, }, + expectErr: false, }, - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - err := clusterv1.AddToScheme(scheme.Scheme) - if err != nil { - t.Fatal(err) - } - - c := fake.NewFakeClient(tt.cluster) - if tt.secret != nil { - c = fake.NewFakeClient(tt.cluster, tt.secret) - } - r := &ClusterReconciler{ - Client: c, - } - err = r.reconcileKubeconfig(context.Background(), tt.cluster) - if (err != nil) != tt.wantErr { - t.Errorf("reconcileKubeconfig() error = %v, wantErr %v", err, tt.wantErr) - } - - _, hasRequeErr := errors.Cause(err).(capierrors.HasRequeueAfterError) - if tt.wantRequeue != hasRequeErr { - t.Errorf("expected RequeAfterError = %v, got %v", tt.wantRequeue, hasRequeErr) - } - }) - } + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + RegisterTestingT(t) + err := clusterv1.AddToScheme(scheme.Scheme) + if err != nil { + t.Fatal(err) + } + + var c client.Client + if tt.infraRef != nil { + infraConfig := &unstructured.Unstructured{Object: tt.infraRef} + c = fake.NewFakeClient(tt.cluster, infraConfig) + } else { + c = fake.NewFakeClient(tt.cluster) + } + r := &ClusterReconciler{ + Client: c, + Log: log.Log, + } + + err = r.reconcileInfrastructure(context.Background(), tt.cluster) + if tt.expectErr { + Expect(err).To(HaveOccurred()) + } else { + Expect(err).ToNot(HaveOccurred()) + } + }) + } + + }) + + t.Run("reconcile kubeconfig", func(t *testing.T) { + cluster := &clusterv1.Cluster{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-cluster", + }, + Status: clusterv1.ClusterStatus{ + APIEndpoints: []clusterv1.APIEndpoint{{ + Host: "1.2.3.4", + Port: 0, + }}, + }, + } + + tests := []struct { + name string + cluster *clusterv1.Cluster + secret *corev1.Secret + wantErr bool + wantRequeue bool + }{ + { + name: "cluster not provisioned, apiEndpoint is not set", + cluster: &clusterv1.Cluster{}, + wantErr: false, + }, + { + name: "kubeconfig secret found", + cluster: cluster, + secret: &corev1.Secret{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-cluster-kubeconfig", + }, + }, + wantErr: false, + }, + { + name: "kubeconfig secret not found, should return RequeueAfterError", + cluster: cluster, + wantErr: true, + wantRequeue: true, + }, + { + name: "invalid ca secret, should return error", + cluster: cluster, + secret: &corev1.Secret{ + ObjectMeta: v1.ObjectMeta{ + Name: "test-cluster-ca", + }, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + RegisterTestingT(t) + err := clusterv1.AddToScheme(scheme.Scheme) + if err != nil { + t.Fatal(err) + } + + c := fake.NewFakeClient(tt.cluster) + if tt.secret != nil { + c = fake.NewFakeClient(tt.cluster, tt.secret) + } + r := &ClusterReconciler{ + Client: c, + } + err = r.reconcileKubeconfig(context.Background(), tt.cluster) + if (err != nil) != tt.wantErr { + t.Errorf("reconcileKubeconfig() error = %v, wantErr %v", err, tt.wantErr) + } + + _, hasRequeErr := errors.Cause(err).(capierrors.HasRequeueAfterError) + if tt.wantRequeue != hasRequeErr { + t.Errorf("expected RequeAfterError = %v, got %v", tt.wantRequeue, hasRequeErr) + } + }) + } + }) } diff --git a/controllers/cluster_controller_test.go b/controllers/cluster_controller_test.go index 79fa3d157212..187cbcf58870 100644 --- a/controllers/cluster_controller_test.go +++ b/controllers/cluster_controller_test.go @@ -17,16 +17,21 @@ limitations under the License. package controllers import ( + "context" "reflect" "testing" + "github.com/gogo/protobuf/proto" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" @@ -34,6 +39,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/metrics" ) var _ = Describe("Cluster Reconciler", func() { @@ -193,6 +199,7 @@ var _ = Describe("Cluster Reconciler", func() { instance.Spec.InfrastructureRef.Name == "test" }, timeout).Should(BeTrue()) }) + It("Should successfully patch a cluster object if only removing finalizers", func() { // Setup cluster := &clusterv1.Cluster{ @@ -323,132 +330,271 @@ var _ = Describe("Cluster Reconciler", func() { }) }) -func TestClusterReconciler_machineToCluster(t *testing.T) { - cluster := &clusterv1.Cluster{ - TypeMeta: metav1.TypeMeta{ - Kind: "Cluster", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test", - }, - Spec: clusterv1.ClusterSpec{}, - Status: clusterv1.ClusterStatus{}, - } +func TestClusterReconciler(t *testing.T) { + t.Run("metrics", func(t *testing.T) { + + errorReason := capierrors.ClusterStatusError("foo") + tests := []struct { + name string + cs clusterv1.ClusterStatus + secret *corev1.Secret + expectedMetrics map[string]float64 + }{ + { + name: "cluster control plane metric is 1 if cluster status is true ", + cs: clusterv1.ClusterStatus{ + ControlPlaneInitialized: true, + }, + expectedMetrics: map[string]float64{"capi_cluster_control_plane_ready": 1}, + }, + { + name: "cluster control plane metric is 0 if cluster status is false ", + cs: clusterv1.ClusterStatus{ + ControlPlaneInitialized: false, + }, + expectedMetrics: map[string]float64{"capi_cluster_control_plane_ready": 0}, + }, + { + name: "cluster infrastructure metric is 1 if cluster status is true ", + cs: clusterv1.ClusterStatus{ + InfrastructureReady: true, + }, + expectedMetrics: map[string]float64{"capi_cluster_infrastructure_ready": 1}, + }, + { + name: "cluster infrastructure metric is 0 if cluster status is false ", + cs: clusterv1.ClusterStatus{ + InfrastructureReady: false, + }, + expectedMetrics: map[string]float64{"capi_cluster_infrastructure_ready": 0}, + }, + { + name: "cluster kubeconfig metric is 0 if secret is unavailable", + cs: clusterv1.ClusterStatus{}, + expectedMetrics: map[string]float64{"capi_cluster_kubeconfig_ready": 0}, + }, + { + name: "cluster kubeconfig metric is 1 if secret is available and ready", + cs: clusterv1.ClusterStatus{}, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster-kubeconfig", + }, + }, + expectedMetrics: map[string]float64{"capi_cluster_kubeconfig_ready": 1}, + }, + { + name: "cluster error metric is 1 if ErrorReason is set", + cs: clusterv1.ClusterStatus{ + ErrorReason: &errorReason, + }, + expectedMetrics: map[string]float64{"capi_cluster_error_set": 1}, + }, + { + name: "cluster error metric is 1 if ErrorMessage is set", + cs: clusterv1.ClusterStatus{ + ErrorMessage: proto.String("some-error"), + }, + expectedMetrics: map[string]float64{"capi_cluster_error_set": 1}, + }, + { + name: "cluster is ready", + cs: clusterv1.ClusterStatus{ + InfrastructureReady: true, + ControlPlaneInitialized: true, + }, + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster-kubeconfig", + }, + }, + expectedMetrics: map[string]float64{ + "capi_cluster_control_plane_ready": 1, + "capi_cluster_infrastructure_ready": 1, + "capi_cluster_kubeconfig_ready": 1, + "capi_cluster_error_set": 0, + }, + }, + } - controlPlaneWithNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "controlPlaneWithNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, - clusterv1.MachineControlPlaneLabelName: "", + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + RegisterTestingT(t) + err := clusterv1.AddToScheme(scheme.Scheme) + if err != nil { + t.Fatal(err) + } + var objs []runtime.Object + + c := &clusterv1.Cluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "Cluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + }, + Spec: clusterv1.ClusterSpec{}, + Status: tt.cs, + } + objs = append(objs, c) + + if tt.secret != nil { + objs = append(objs, tt.secret) + } + + r := &ClusterReconciler{ + Client: fake.NewFakeClient(objs...), + Log: log.Log, + } + + r.reconcileMetrics(context.TODO(), c) + + for em, ev := range tt.expectedMetrics { + mr, err := metrics.Registry.Gather() + Expect(err).ToNot(HaveOccurred()) + mf := getMetricFamily(mr, em) + Expect(mf).ToNot(BeNil()) + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + // ensure that the metric has a matching label + if l.GetName() == "cluster" && l.GetValue() == c.Name { + Expect(m.GetGauge().GetValue()).To(Equal(ev)) + } + } + } + } + + }) + } + }) + + t.Run("machine to cluster", func(t *testing.T) { + cluster := &clusterv1.Cluster{ + TypeMeta: metav1.TypeMeta{ + Kind: "Cluster", }, - }, - Status: clusterv1.MachineStatus{ - NodeRef: &v1.ObjectReference{ - Kind: "Node", - Namespace: "test-node", + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test", }, - }, - } - controlPlaneWithoutNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "controlPlaneWithoutNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, - clusterv1.MachineControlPlaneLabelName: "", + Spec: clusterv1.ClusterSpec{}, + Status: clusterv1.ClusterStatus{}, + } + + controlPlaneWithNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", }, - }, - } - nonControlPlaneWithNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "nonControlPlaneWitNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, + ObjectMeta: metav1.ObjectMeta{ + Name: "controlPlaneWithNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + clusterv1.MachineControlPlaneLabelName: "", + }, }, - }, - Status: clusterv1.MachineStatus{ - NodeRef: &v1.ObjectReference{ - Kind: "Node", - Namespace: "test-node", + Status: clusterv1.MachineStatus{ + NodeRef: &v1.ObjectReference{ + Kind: "Node", + Namespace: "test-node", + }, }, - }, - } - nonControlPlaneWithoutNoderef := &clusterv1.Machine{ - TypeMeta: metav1.TypeMeta{ - Kind: "Machine", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "nonControlPlaneWithoutNoderef", - Labels: map[string]string{ - clusterv1.ClusterLabelName: cluster.Name, + } + controlPlaneWithoutNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", }, - }, - } + ObjectMeta: metav1.ObjectMeta{ + Name: "controlPlaneWithoutNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + clusterv1.MachineControlPlaneLabelName: "", + }, + }, + } + nonControlPlaneWithNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "nonControlPlaneWitNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + }, + }, + Status: clusterv1.MachineStatus{ + NodeRef: &v1.ObjectReference{ + Kind: "Node", + Namespace: "test-node", + }, + }, + } + nonControlPlaneWithoutNoderef := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "nonControlPlaneWithoutNoderef", + Labels: map[string]string{ + clusterv1.ClusterLabelName: cluster.Name, + }, + }, + } - tests := []struct { - name string - o handler.MapObject - want []ctrl.Request - }{ - { - name: "controlplane machine, noderef is set, should return cluster", - o: handler.MapObject{ - Meta: controlPlaneWithNoderef.GetObjectMeta(), - Object: controlPlaneWithNoderef, - }, - want: []ctrl.Request{ - {NamespacedName: client.ObjectKey{ - Name: cluster.Name, - Namespace: cluster.Namespace, - }}, + tests := []struct { + name string + o handler.MapObject + want []ctrl.Request + }{ + { + name: "controlplane machine, noderef is set, should return cluster", + o: handler.MapObject{ + Meta: controlPlaneWithNoderef.GetObjectMeta(), + Object: controlPlaneWithNoderef, + }, + want: []ctrl.Request{ + {NamespacedName: client.ObjectKey{ + Name: cluster.Name, + Namespace: cluster.Namespace, + }}, + }, }, - }, - { - name: "controlplane machine, noderef is not set", - o: handler.MapObject{ - Meta: controlPlaneWithoutNoderef.GetObjectMeta(), - Object: controlPlaneWithoutNoderef, + { + name: "controlplane machine, noderef is not set", + o: handler.MapObject{ + Meta: controlPlaneWithoutNoderef.GetObjectMeta(), + Object: controlPlaneWithoutNoderef, + }, + want: nil, }, - want: nil, - }, - { - name: "not controlplane machine, noderef is set", - o: handler.MapObject{ - Meta: nonControlPlaneWithNoderef.GetObjectMeta(), - Object: nonControlPlaneWithNoderef, + { + name: "not controlplane machine, noderef is set", + o: handler.MapObject{ + Meta: nonControlPlaneWithNoderef.GetObjectMeta(), + Object: nonControlPlaneWithNoderef, + }, + want: nil, }, - want: nil, - }, - { - name: "not controlplane machine, noderef is not set", - o: handler.MapObject{ - Meta: nonControlPlaneWithoutNoderef.GetObjectMeta(), - Object: nonControlPlaneWithoutNoderef, + { + name: "not controlplane machine, noderef is not set", + o: handler.MapObject{ + Meta: nonControlPlaneWithoutNoderef.GetObjectMeta(), + Object: nonControlPlaneWithoutNoderef, + }, + want: nil, }, - want: nil, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - r := &ClusterReconciler{ - Client: fake.NewFakeClient(cluster, controlPlaneWithNoderef, controlPlaneWithoutNoderef, nonControlPlaneWithNoderef, nonControlPlaneWithoutNoderef), - Log: log.Log, - } - if got := r.controlPlaneMachineToCluster(tt.o); !reflect.DeepEqual(got, tt.want) { - t.Errorf("controlPlaneMachineToCluster() = %v, want %v", got, tt.want) - } - }) - } + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := &ClusterReconciler{ + Client: fake.NewFakeClient(cluster, controlPlaneWithNoderef, controlPlaneWithoutNoderef, nonControlPlaneWithNoderef, nonControlPlaneWithoutNoderef), + Log: log.Log, + } + if got := r.controlPlaneMachineToCluster(tt.o); !reflect.DeepEqual(got, tt.want) { + t.Errorf("controlPlaneMachineToCluster() = %v, want %v", got, tt.want) + } + }) + } + }) } type machineDeploymentBuilder struct { diff --git a/controllers/machine_controller.go b/controllers/machine_controller.go index a7d99004aaa1..dc5cc26a60e1 100644 --- a/controllers/machine_controller.go +++ b/controllers/machine_controller.go @@ -37,6 +37,7 @@ import ( "k8s.io/klog" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/controllers/metrics" "sigs.k8s.io/cluster-api/controllers/remote" capierrors "sigs.k8s.io/cluster-api/errors" kubedrain "sigs.k8s.io/cluster-api/third_party/kubernetes-drain" @@ -114,6 +115,7 @@ func (r *MachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, reterr e defer func() { // Always reconcile the Status.Phase field. r.reconcilePhase(ctx, m) + r.reconcileMetrics(ctx, m) // Always attempt to Patch the Machine object and status after each reconciliation. if err := patchHelper.Patch(ctx, m); err != nil { @@ -189,6 +191,24 @@ func (r *MachineReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cl return res, kerrors.NewAggregate(errs) } +func (r *MachineReconciler) reconcileMetrics(_ context.Context, m *clusterv1.Machine) { + if m.Status.BootstrapReady { + metrics.MachineBootstrapReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) + } else { + metrics.MachineBootstrapReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) + } + if m.Status.InfrastructureReady { + metrics.MachineInfrastructureReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) + } else { + metrics.MachineInfrastructureReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) + } + if m.Status.NodeRef != nil { + metrics.MachineNodeReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(1) + } else { + metrics.MachineNodeReady.WithLabelValues(m.Name, m.Namespace, m.Spec.ClusterName).Set(0) + } +} + func (r *MachineReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { logger := r.Log.WithValues("machine", m.Name, "namespace", m.Namespace) logger = logger.WithValues("cluster", cluster.Name) diff --git a/controllers/machine_controller_phases_test.go b/controllers/machine_controller_phases_test.go index a4735795a298..b788695ac542 100644 --- a/controllers/machine_controller_phases_test.go +++ b/controllers/machine_controller_phases_test.go @@ -22,12 +22,13 @@ import ( "time" . "github.com/onsi/ginkgo" - "github.com/onsi/gomega" . "github.com/onsi/gomega" + dto "github.com/prometheus/client_model/go" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/util/kubeconfig" @@ -376,7 +377,7 @@ func TestReconcileBootstrap(t *testing.T) { bootstrapConfig map[string]interface{} machine *clusterv1.Machine expectError bool - expected func(g *gomega.WithT, m *clusterv1.Machine) + expected func(g *WithT, m *clusterv1.Machine) }{ { name: "new machine, bootstrap config ready with data", @@ -394,10 +395,10 @@ func TestReconcileBootstrap(t *testing.T) { }, }, expectError: false, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.BootstrapReady).To(gomega.BeTrue()) - g.Expect(m.Spec.Bootstrap.Data).ToNot(gomega.BeNil()) - g.Expect(*m.Spec.Bootstrap.Data).To(gomega.ContainSubstring("#!/bin/bash")) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.BootstrapReady).To(BeTrue()) + g.Expect(m.Spec.Bootstrap.Data).ToNot(BeNil()) + g.Expect(*m.Spec.Bootstrap.Data).To(ContainSubstring("#!/bin/bash")) }, }, { @@ -415,9 +416,9 @@ func TestReconcileBootstrap(t *testing.T) { }, }, expectError: true, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.BootstrapReady).To(gomega.BeFalse()) - g.Expect(m.Spec.Bootstrap.Data).To(gomega.BeNil()) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.BootstrapReady).To(BeFalse()) + g.Expect(m.Spec.Bootstrap.Data).To(BeNil()) }, }, { @@ -433,8 +434,8 @@ func TestReconcileBootstrap(t *testing.T) { "status": map[string]interface{}{}, }, expectError: true, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.BootstrapReady).To(gomega.BeFalse()) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.BootstrapReady).To(BeFalse()) }, }, { @@ -450,8 +451,8 @@ func TestReconcileBootstrap(t *testing.T) { "status": map[string]interface{}{}, }, expectError: true, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.BootstrapReady).To(gomega.BeFalse()) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.BootstrapReady).To(BeFalse()) }, }, { @@ -503,9 +504,9 @@ func TestReconcileBootstrap(t *testing.T) { }, }, expectError: false, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.BootstrapReady).To(gomega.BeTrue()) - g.Expect(*m.Spec.Bootstrap.Data).To(gomega.Equal("#!/bin/bash ... data")) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.BootstrapReady).To(BeTrue()) + g.Expect(*m.Spec.Bootstrap.Data).To(Equal("#!/bin/bash ... data")) }, }, { @@ -543,15 +544,17 @@ func TestReconcileBootstrap(t *testing.T) { }, }, expectError: false, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.BootstrapReady).To(gomega.BeTrue()) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.BootstrapReady).To(BeTrue()) }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - g := gomega.NewGomegaWithT(t) + g := NewGomegaWithT(t) + err := clusterv1.AddToScheme(scheme.Scheme) + g.Expect(err).NotTo(HaveOccurred()) if tc.machine == nil { tc.machine = defaultMachine.DeepCopy() @@ -563,11 +566,11 @@ func TestReconcileBootstrap(t *testing.T) { Log: log.Log, } - err := r.reconcileBootstrap(context.Background(), tc.machine) + err = r.reconcileBootstrap(context.Background(), tc.machine) if tc.expectError { - g.Expect(err).ToNot(gomega.BeNil()) + g.Expect(err).ToNot(BeNil()) } else { - g.Expect(err).To(gomega.BeNil()) + g.Expect(err).To(BeNil()) } if tc.expected != nil { @@ -612,7 +615,7 @@ func TestReconcileInfrastructure(t *testing.T) { expectError bool expectChanged bool expectRequeueAfter bool - expected func(g *gomega.WithT, m *clusterv1.Machine) + expected func(g *WithT, m *clusterv1.Machine) }{ { name: "new machine, infrastructure config ready", @@ -642,8 +645,8 @@ func TestReconcileInfrastructure(t *testing.T) { }, expectError: false, expectChanged: true, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.InfrastructureReady).To(gomega.BeTrue()) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.InfrastructureReady).To(BeTrue()) }, }, { @@ -693,18 +696,20 @@ func TestReconcileInfrastructure(t *testing.T) { }, expectError: true, expectRequeueAfter: true, - expected: func(g *gomega.WithT, m *clusterv1.Machine) { - g.Expect(m.Status.InfrastructureReady).To(gomega.BeTrue()) - g.Expect(m.Status.ErrorMessage).ToNot(gomega.BeNil()) - g.Expect(m.Status.ErrorReason).ToNot(gomega.BeNil()) - g.Expect(m.Status.GetTypedPhase()).To(gomega.Equal(clusterv1.MachinePhaseFailed)) + expected: func(g *WithT, m *clusterv1.Machine) { + g.Expect(m.Status.InfrastructureReady).To(BeTrue()) + g.Expect(m.Status.ErrorMessage).ToNot(BeNil()) + g.Expect(m.Status.ErrorReason).ToNot(BeNil()) + g.Expect(m.Status.GetTypedPhase()).To(Equal(clusterv1.MachinePhaseFailed)) }, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - g := gomega.NewGomegaWithT(t) + g := NewGomegaWithT(t) + err := clusterv1.AddToScheme(scheme.Scheme) + g.Expect(err).NotTo(HaveOccurred()) if tc.machine == nil { tc.machine = defaultMachine.DeepCopy() @@ -716,19 +721,26 @@ func TestReconcileInfrastructure(t *testing.T) { Log: log.Log, } - err := r.reconcileInfrastructure(context.Background(), tc.machine) + err = r.reconcileInfrastructure(context.Background(), tc.machine) r.reconcilePhase(context.Background(), tc.machine) if tc.expectError { - g.Expect(err).ToNot(gomega.BeNil()) + g.Expect(err).ToNot(BeNil()) } else { - g.Expect(err).To(gomega.BeNil()) + g.Expect(err).To(BeNil()) } if tc.expected != nil { tc.expected(g, tc.machine) } }) - } +} +func getMetricFamily(list []*dto.MetricFamily, metricName string) *dto.MetricFamily { + for _, mf := range list { + if mf.GetName() == metricName { + return mf + } + } + return nil } diff --git a/controllers/machine_controller_test.go b/controllers/machine_controller_test.go index 207036bebf68..d5777a13c018 100644 --- a/controllers/machine_controller_test.go +++ b/controllers/machine_controller_test.go @@ -17,6 +17,7 @@ limitations under the License. package controllers import ( + "context" "testing" . "github.com/onsi/gomega" @@ -31,6 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -553,3 +555,96 @@ func TestRemoveMachineFinalizerAfterDeleteReconcile(t *testing.T) { Expect(mr.Client.Get(ctx, key, m)).ToNot(HaveOccurred()) Expect(m.ObjectMeta.Finalizers).To(Equal([]string{metav1.FinalizerDeleteDependents})) } + +func TestReconcileMetrics(t *testing.T) { + RegisterTestingT(t) + + err := clusterv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + tests := []struct { + name string + ms clusterv1.MachineStatus + expectedMetrics map[string]float64 + }{ + { + name: "machine bootstrap metric is set to 1 if ready", + ms: clusterv1.MachineStatus{ + BootstrapReady: true, + }, + expectedMetrics: map[string]float64{"capi_machine_bootstrap_ready": 1}, + }, + { + name: "machine bootstrap metric is set to 0 if not ready", + ms: clusterv1.MachineStatus{ + BootstrapReady: false, + }, + expectedMetrics: map[string]float64{"capi_machine_bootstrap_ready": 0}, + }, + { + name: "machine infrastructure metric is set to 1 if ready", + ms: clusterv1.MachineStatus{ + InfrastructureReady: true, + }, + expectedMetrics: map[string]float64{"capi_machine_infrastructure_ready": 1}, + }, + { + name: "machine infrastructure metric is set to 0 if not ready", + ms: clusterv1.MachineStatus{ + InfrastructureReady: false, + }, + expectedMetrics: map[string]float64{"capi_machine_infrastructure_ready": 0}, + }, + { + name: "machine node metric is set to 1 if node ref exists", + ms: clusterv1.MachineStatus{ + NodeRef: &corev1.ObjectReference{ + Name: "test", + }, + }, + expectedMetrics: map[string]float64{"capi_machine_node_ready": 1}, + }, + { + name: "machine infrastructure metric is set to 0 if not ready", + ms: clusterv1.MachineStatus{}, + expectedMetrics: map[string]float64{"capi_machine_node_ready": 0}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var objs []runtime.Object + machine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machine", + }, + Spec: clusterv1.MachineSpec{}, + Status: tt.ms, + } + objs = append(objs, machine) + + r := &MachineReconciler{ + Client: fake.NewFakeClient(objs...), + Log: log.Log, + } + + r.reconcileMetrics(context.TODO(), machine) + + for em, ev := range tt.expectedMetrics { + mr, err := metrics.Registry.Gather() + Expect(err).ToNot(HaveOccurred()) + mf := getMetricFamily(mr, em) + Expect(mf).ToNot(BeNil()) + for _, m := range mf.GetMetric() { + for _, l := range m.GetLabel() { + // ensure that the metric has a matching label + if l.GetName() == "machine" && l.GetValue() == machine.Name { + Expect(m.GetGauge().GetValue()).To(Equal(ev)) + } + } + } + } + }) + } + +} diff --git a/controllers/metrics/metrics.go b/controllers/metrics/metrics.go new file mode 100644 index 000000000000..e2895682459d --- /dev/null +++ b/controllers/metrics/metrics.go @@ -0,0 +1,108 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package metrics defines the metrics available for the cluster api +// controllers. +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + // ClusterControlPlaneReady is a metric that is set to 1 if the cluster + // control plane is ready and 0 if it is not. + ClusterControlPlaneReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_control_plane_ready", + Help: "Cluster control plane is ready if set to 1 and not if 0.", + }, + []string{"cluster", "namespace"}, + ) + + // ClusterInfrastructureReady is a metric that is set to 1 if the cluster + // infrastructure is ready and 0 if it is not. + ClusterInfrastructureReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_infrastructure_ready", + Help: "Cluster infrastructure is ready if set to 1 and not if 0.", + }, + []string{"cluster", "namespace"}, + ) + + // ClusterKubeconfigReady is a metric that is set to 1 if the cluster + // kubeconfig secret has been created and 0 if it is not. + ClusterKubeconfigReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_kubeconfig_ready", + Help: "Cluster kubeconfig is ready if set to 1 and not if 0.", + }, + []string{"cluster", "namespace"}, + ) + + // ClusterErrorSet is a metric that is set to 1 if the cluster ErrorReason + // or ErrorMessage is set and 0 if it is not. + ClusterErrorSet = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_cluster_error_set", + Help: "Cluster error messsage or reason is set if metric is 1.", + }, + []string{"cluster", "namespace"}, + ) + + // MachineBootstrapReady is a metric that is set to 1 if machine bootstrap + // is ready and 0 if it is not. + MachineBootstrapReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_machine_bootstrap_ready", + Help: "Machine Boostrap is ready if set to 1 and not if 0.", + }, + []string{"machine", "namespace", "cluster"}, + ) + + // MachineInfrastructureReady is a metric that is set to 1 if machine + // infrastructure is ready and 0 if it is not. + MachineInfrastructureReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_machine_infrastructure_ready", + Help: "Machine InfrastructureRef is ready if set to 1 and not if 0.", + }, + []string{"machine", "namespace", "cluster"}, + ) + + // MachineNodeReady is a metric that is set to 1 if machine node is ready + // and 0 if it is not. + MachineNodeReady = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "capi_machine_node_ready", + Help: "Machine NodeRef is ready if set to 1 and not if 0.", + }, + []string{"machine", "namespace", "cluster"}, + ) +) + +func init() { + metrics.Registry.MustRegister( + ClusterControlPlaneReady, + ClusterInfrastructureReady, + ClusterKubeconfigReady, + ClusterErrorSet, + MachineBootstrapReady, + MachineInfrastructureReady, + MachineNodeReady, + ) +} diff --git a/go.mod b/go.mod index 08a09247b3df..b5f2cc15bd37 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/davecgh/go-spew v1.1.1 github.com/go-logr/logr v0.1.0 - github.com/gogo/protobuf v1.2.1 // indirect + github.com/gogo/protobuf v1.2.1 github.com/google/go-cmp v0.3.1 // indirect github.com/gophercloud/gophercloud v0.3.0 // indirect github.com/hashicorp/golang-lru v0.5.3 // indirect @@ -15,6 +15,8 @@ require ( github.com/onsi/ginkgo v1.10.1 github.com/onsi/gomega v1.7.0 github.com/pkg/errors v0.8.1 + github.com/prometheus/client_golang v1.0.0 + github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 github.com/prometheus/procfs v0.0.5 // indirect github.com/sergi/go-diff v1.0.0 github.com/spf13/cobra v0.0.5