Skip to content

Commit

Permalink
Add metrics reconcile step for machine and cluster controllers
Browse files Browse the repository at this point in the history
Signed-off-by: Warren Fernandes <[email protected]>
  • Loading branch information
Warren Fernandes committed Nov 12, 2019
1 parent c821586 commit 96f9e2d
Show file tree
Hide file tree
Showing 11 changed files with 555 additions and 333 deletions.
75 changes: 33 additions & 42 deletions controllers/cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (

"github.com/go-logr/logr"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -34,14 +33,15 @@ import (
"k8s.io/client-go/tools/record"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
"sigs.k8s.io/cluster-api/controllers/external"
"sigs.k8s.io/cluster-api/controllers/metrics"
capierrors "sigs.k8s.io/cluster-api/errors"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/secret"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
)
Expand All @@ -52,38 +52,6 @@ const (
deleteRequeueAfter = 5 * time.Second
)

var (
clusterControlPlaneReady = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "capi_cluster_control_plane_ready",
Help: "Cluster control plane is ready if set to 1 and not if 0.",
},
[]string{"cluster", "namespace"},
)
clusterInfrastructureReady = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "capi_cluster_infrastructure_ready",
Help: "Cluster infrastructure is ready if set to 1 and not if 0.",
},
[]string{"cluster", "namespace"},
)
clusterKubeconfigReady = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "capi_cluster_kubeconfig_ready",
Help: "Cluster kubeconfig is ready if set to 1 and not if 0.",
},
[]string{"cluster", "namespace"},
)
)

func init() {
metrics.Registry.MustRegister(
clusterControlPlaneReady,
clusterInfrastructureReady,
clusterKubeconfigReady,
)
}

// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;patch
// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -145,6 +113,7 @@ func (r *ClusterReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, reterr e
defer func() {
// Always reconcile the Status.Phase field.
r.reconcilePhase(ctx, cluster)
r.reconcileMetrics(ctx, cluster)

// Always attempt to Patch the Cluster object and status after each reconciliation.
if err := patchHelper.Patch(ctx, cluster); err != nil {
Expand Down Expand Up @@ -198,6 +167,35 @@ func (r *ClusterReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cl
return res, kerrors.NewAggregate(errs)
}

func (r *ClusterReconciler) reconcileMetrics(_ context.Context, cluster *clusterv1.Cluster) {

if cluster.Status.ControlPlaneInitialized {
metrics.ClusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1)
} else {
metrics.ClusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0)
}

if cluster.Status.InfrastructureReady {
metrics.ClusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1)
} else {
metrics.ClusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0)
}

// TODO: [wfernandes] pass context here
_, err := secret.Get(r.Client, cluster, secret.Kubeconfig)
if err != nil {
metrics.ClusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0)
} else {
metrics.ClusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1)
}

if cluster.Status.ErrorReason != nil || cluster.Status.ErrorMessage != nil {
metrics.ClusterErrorSet.WithLabelValues(cluster.Name, cluster.Namespace).Set(1)
} else {
metrics.ClusterErrorSet.WithLabelValues(cluster.Name, cluster.Namespace).Set(0)
}
}

// reconcileDelete handles cluster deletion.
func (r *ClusterReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) {
logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace)
Expand Down Expand Up @@ -372,14 +370,7 @@ func splitMachineList(list *clusterv1.MachineList) (*clusterv1.MachineList, *clu
return controlplanes, nodes
}

func (r *ClusterReconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) (err error) {
defer func() {
if err != nil || !cluster.Status.ControlPlaneInitialized {
clusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0)
} else {
clusterControlPlaneReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1)
}
}()
func (r *ClusterReconciler) reconcileControlPlaneInitialized(ctx context.Context, cluster *clusterv1.Cluster) error {
logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace)

if cluster.Status.ControlPlaneInitialized {
Expand Down
19 changes: 2 additions & 17 deletions controllers/cluster_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,7 @@ func (r *ClusterReconciler) reconcileExternal(ctx context.Context, cluster *clus
}

// reconcileInfrastructure reconciles the Spec.InfrastructureRef object on a Cluster.
func (r *ClusterReconciler) reconcileInfrastructure(ctx context.Context, cluster *clusterv1.Cluster) (err error) {
defer func() {
if err != nil || !cluster.Status.InfrastructureReady {
clusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0)
} else {
clusterInfrastructureReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1)
}
}()
func (r *ClusterReconciler) reconcileInfrastructure(ctx context.Context, cluster *clusterv1.Cluster) error {
logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace)

if cluster.Spec.InfrastructureRef == nil {
Expand Down Expand Up @@ -194,15 +187,7 @@ func (r *ClusterReconciler) reconcileInfrastructure(ctx context.Context, cluster
return nil
}

func (r *ClusterReconciler) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster) (rerr error) {
defer func() {
if rerr != nil || len(cluster.Status.APIEndpoints) == 0 {
clusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(0)
} else {
clusterKubeconfigReady.WithLabelValues(cluster.Name, cluster.Namespace).Set(1)
}
}()

func (r *ClusterReconciler) reconcileKubeconfig(ctx context.Context, cluster *clusterv1.Cluster) error {
if len(cluster.Status.APIEndpoints) == 0 {
return nil
}
Expand Down
85 changes: 28 additions & 57 deletions controllers/cluster_controller_phases_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,9 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

func TestClusterReconciler(t *testing.T) {
func TestClusterReconcilePhases(t *testing.T) {
t.Run("reconcile infrastructure", func(t *testing.T) {
cluster := &clusterv1.Cluster{
ObjectMeta: v1.ObjectMeta{
Expand All @@ -58,23 +57,20 @@ func TestClusterReconciler(t *testing.T) {
}

tests := []struct {
name string
cluster *clusterv1.Cluster
infraRef map[string]interface{}
expectErr bool
expectedMetric float64
name string
cluster *clusterv1.Cluster
infraRef map[string]interface{}
expectErr bool
}{
{
name: "returns no error if infrastructure ref is nil",
cluster: &clusterv1.Cluster{ObjectMeta: v1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}},
expectErr: false,
expectedMetric: 0,
name: "returns no error if infrastructure ref is nil",
cluster: &clusterv1.Cluster{ObjectMeta: v1.ObjectMeta{Name: "test-cluster", Namespace: "test-namespace"}},
expectErr: false,
},
{
name: "returns error if unable to reconcile infrastructure ref",
cluster: cluster,
expectErr: true,
expectedMetric: 0,
name: "returns error if unable to reconcile infrastructure ref",
cluster: cluster,
expectErr: true,
},
{
name: "returns no error if infra config is marked for deletion",
Expand All @@ -88,11 +84,10 @@ func TestClusterReconciler(t *testing.T) {
"deletionTimestamp": "sometime",
},
},
expectErr: false,
expectedMetric: 1,
expectErr: false,
},
{
name: "returns no error and sets metric to 1 if infrastructure is marked ready on cluster",
name: "returns no error if infrastructure is marked ready on cluster",
cluster: cluster,
infraRef: map[string]interface{}{
"kind": "InfrastructureConfig",
Expand All @@ -103,8 +98,7 @@ func TestClusterReconciler(t *testing.T) {
"deletionTimestamp": "sometime",
},
},
expectErr: false,
expectedMetric: 1,
expectErr: false,
},
}

Expand Down Expand Up @@ -134,12 +128,6 @@ func TestClusterReconciler(t *testing.T) {
} else {
Expect(err).ToNot(HaveOccurred())
}

mr, err := metrics.Registry.Gather()
Expect(err).ToNot(HaveOccurred())
mf := getMetricFamily(mr, "capi_cluster_infrastructure_ready")
Expect(mf).ToNot(BeNil())
Expect(mf.GetMetric()[0].GetGauge().GetValue()).To(Equal(tt.expectedMetric))
})
}

Expand All @@ -159,18 +147,16 @@ func TestClusterReconciler(t *testing.T) {
}

tests := []struct {
name string
cluster *clusterv1.Cluster
secret *corev1.Secret
wantErr bool
wantRequeue bool
expectedMetric float64
name string
cluster *clusterv1.Cluster
secret *corev1.Secret
wantErr bool
wantRequeue bool
}{
{
name: "cluster not provisioned, apiEndpoint is not set",
cluster: &clusterv1.Cluster{},
wantErr: false,
expectedMetric: 0,
name: "cluster not provisioned, apiEndpoint is not set",
cluster: &clusterv1.Cluster{},
wantErr: false,
},
{
name: "kubeconfig secret found",
Expand All @@ -180,15 +166,13 @@ func TestClusterReconciler(t *testing.T) {
Name: "test-cluster-kubeconfig",
},
},
wantErr: false,
expectedMetric: 1,
wantErr: false,
},
{
name: "kubeconfig secret not found, should return RequeueAfterError",
cluster: cluster,
wantErr: true,
wantRequeue: true,
expectedMetric: 0,
name: "kubeconfig secret not found, should return RequeueAfterError",
cluster: cluster,
wantErr: true,
wantRequeue: true,
},
{
name: "invalid ca secret, should return error",
Expand All @@ -198,8 +182,7 @@ func TestClusterReconciler(t *testing.T) {
Name: "test-cluster-ca",
},
},
wantErr: true,
expectedMetric: 0,
wantErr: true,
},
}
for _, tt := range tests {
Expand All @@ -226,18 +209,6 @@ func TestClusterReconciler(t *testing.T) {
if tt.wantRequeue != hasRequeErr {
t.Errorf("expected RequeAfterError = %v, got %v", tt.wantRequeue, hasRequeErr)
}

mr, err := metrics.Registry.Gather()
Expect(err).ToNot(HaveOccurred())
mf := getMetricFamily(mr, "capi_cluster_kubeconfig_ready")
Expect(mf).ToNot(BeNil())
for _, m := range mf.GetMetric() {
for _, lp := range m.GetLabel() {
if lp.GetName() == "cluster" && lp.GetValue() == "test-cluster" {
Expect(m.GetGauge().GetValue()).To(Equal(tt.expectedMetric))
}
}
}
})
}
})
Expand Down
Loading

0 comments on commit 96f9e2d

Please sign in to comment.