From cc6f5a9dc9b3064f1c4ba4fa10f36a66f54d4151 Mon Sep 17 00:00:00 2001 From: Raphael Fonseca Date: Mon, 15 Jul 2024 11:30:48 -0300 Subject: [PATCH 1/3] chore: add custom metrics to sgController --- go.mod | 6 +- go.sum | 4 +- .../ec2.aws/securitygroup_controller.go | 56 +++++- .../ec2.aws/securitygroup_controller_test.go | 175 ++++++++++++++++++ metrics/metrics.go | 22 +++ pkg/kops/kops.go | 5 - 6 files changed, 250 insertions(+), 18 deletions(-) create mode 100644 metrics/metrics.go diff --git a/go.mod b/go.mod index 3f48326..20802c8 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 github.com/pkg/errors v0.9.1 - github.com/topfreegames/kubernetes-kops-operator v0.11.0-rc + github.com/topfreegames/kubernetes-kops-operator v0.13.1-alpha go.uber.org/zap v1.25.0 k8s.io/api v0.28.1 k8s.io/apimachinery v0.28.1 @@ -95,8 +95,8 @@ require ( github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/sftp v1.13.6 // indirect - github.com/prometheus/client_golang v1.16.0 // indirect - github.com/prometheus/client_model v0.4.0 // indirect + github.com/prometheus/client_golang v1.16.0 + github.com/prometheus/client_model v0.4.0 github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect github.com/spf13/afero v1.9.5 // indirect diff --git a/go.sum b/go.sum index 3f182e8..2a5ade4 100644 --- a/go.sum +++ b/go.sum @@ -360,8 +360,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/topfreegames/kubernetes-kops-operator v0.11.0-rc h1:L4WaAXgb+FM+D+oVJnJgxutOiwWTbCS+1uhpIhLzTqA= -github.com/topfreegames/kubernetes-kops-operator v0.11.0-rc/go.mod h1:PnaRBjLXOYvuTRDZEBppcE0xLAVvz8NH/5gwi//SUIc= +github.com/topfreegames/kubernetes-kops-operator v0.13.1-alpha h1:j2a0J3owdH5gB2c0AW42JcjA/Fi5a1L1mJBz0yAknE4= +github.com/topfreegames/kubernetes-kops-operator v0.13.1-alpha/go.mod h1:+FwqSNyOaJ3bRMI07c2DrVAJD+Ky8rIE1yaVsw5Du9E= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= diff --git a/internal/controller/ec2.aws/securitygroup_controller.go b/internal/controller/ec2.aws/securitygroup_controller.go index 44c7cd2..cbc7ea6 100644 --- a/internal/controller/ec2.aws/securitygroup_controller.go +++ b/internal/controller/ec2.aws/securitygroup_controller.go @@ -43,6 +43,7 @@ import ( crossec2v1beta1 "github.com/crossplane-contrib/provider-aws/apis/ec2/v1beta1" "github.com/go-logr/logr" "github.com/hashicorp/go-multierror" + custommetrics "github.com/topfreegames/kubernetes-crossplane-infrastructure-operator/metrics" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" @@ -116,7 +117,7 @@ func DefaultReconciler(mgr manager.Manager) *SecurityGroupReconciler { //+kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;patch //+kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update -func (c *SecurityGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, rerr error) { +func (c *SecurityGroupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reconciliationErr error) { r := &SecurityGroupReconciliation{ SecurityGroupReconciler: *c, start: time.Now(), @@ -141,11 +142,13 @@ func (c *SecurityGroupReconciler) Reconcile(ctx context.Context, req ctrl.Reques r.log.Info(fmt.Sprintf("starting reconcile loop for %s", r.sg.ObjectMeta.GetName())) defer func() { + environment := r.getEnvironment(ctx) + securityGroupHelper := r.sg.DeepCopy() if err := r.Update(ctx, r.sg); err != nil { r.Recorder.Eventf(r.sg, corev1.EventTypeWarning, "FailedToUpdate", "failed to update security group %s: %s", r.sg.Name, err) - if rerr == nil { - rerr = err + if reconciliationErr == nil { + reconciliationErr = err } } @@ -153,13 +156,17 @@ func (c *SecurityGroupReconciler) Reconcile(ctx context.Context, req ctrl.Reques r.sg.Status = securityGroupHelper.Status if err := r.Status().Update(ctx, r.sg); err != nil { r.Recorder.Eventf(r.sg, corev1.EventTypeWarning, "FailedToUpdateStatus", "failed to update security group status %s: %s", r.sg.Name, err) - if rerr == nil { - rerr = err + if reconciliationErr == nil { + reconciliationErr = err } } } - - r.log.Info(fmt.Sprintf("finished reconcile loop for %s", r.sg.ObjectMeta.GetName())) + if reconciliationErr != nil { + custommetrics.ReconciliationConsecutiveErrorsTotal.WithLabelValues("securitygroup", r.sg.Name, environment).Inc() + } else { + custommetrics.ReconciliationConsecutiveErrorsTotal.WithLabelValues("securitygroup", r.sg.Name, environment).Set(0) + } + r.log.Info(fmt.Sprintf("finished reconcile loop for %s", r.sg.Name)) }() err := r.retrieveInfraRefInfo(ctx) @@ -181,7 +188,7 @@ func (r *SecurityGroupReconciliation) retrieveKCPNetworkInfo(ctx context.Context Name: name, Namespace: namespace, } - if err := r.Client.Get(ctx, key, kcp); err != nil { + if err := r.Get(ctx, key, kcp); err != nil { return nil, nil, nil, err } @@ -967,3 +974,36 @@ func (r *SecurityGroupReconciliation) addKCPFinalizer(ctx context.Context, kcp k } } } + +func (r *SecurityGroupReconciliation) getEnvironment(ctx context.Context) string { + ref := r.sg.Spec.InfrastructureRef[0] + var clusterObject clusterv1beta1.Cluster + switch ref.Kind { + case "KopsMachinePool": + kmp := kinfrastructurev1alpha1.KopsMachinePool{} + key := client.ObjectKey{ + Name: ref.Name, + Namespace: ref.Namespace, + } + if err := r.Client.Get(ctx, key, &kmp); err != nil { + return "" + } + clusterName := &kmp.Spec.ClusterName + key = client.ObjectKey{ + Name: *clusterName, + Namespace: kmp.ObjectMeta.Namespace, + } + if err := r.Client.Get(ctx, key, &clusterObject); err != nil { + return "" + } + case "KopsControlPlane": + key := client.ObjectKey{ + Name: ref.Name, + Namespace: ref.Namespace, + } + if err := r.Client.Get(ctx, key, &clusterObject); err != nil { + return "" + } + } + return clusterObject.Labels["environment"] +} diff --git a/internal/controller/ec2.aws/securitygroup_controller_test.go b/internal/controller/ec2.aws/securitygroup_controller_test.go index 5967481..3488fcb 100644 --- a/internal/controller/ec2.aws/securitygroup_controller_test.go +++ b/internal/controller/ec2.aws/securitygroup_controller_test.go @@ -31,9 +31,11 @@ import ( crossec2v1beta1 "github.com/crossplane-contrib/provider-aws/apis/ec2/v1beta1" crossplanev1 "github.com/crossplane/crossplane-runtime/apis/common/v1" "github.com/hashicorp/go-multierror" + dto "github.com/prometheus/client_model/go" oceanaws "github.com/spotinst/spotinst-sdk-go/service/ocean/providers/aws" clustermeshv1alpha1 "github.com/topfreegames/kubernetes-crossplane-infrastructure-operator/api/clustermesh.infrastructure/v1alpha1" securitygroupv1alpha2 "github.com/topfreegames/kubernetes-crossplane-infrastructure-operator/api/ec2.aws/v1alpha2" + custommetrics "github.com/topfreegames/kubernetes-crossplane-infrastructure-operator/metrics" "github.com/topfreegames/kubernetes-crossplane-infrastructure-operator/pkg/aws/autoscaling" fakeasg "github.com/topfreegames/kubernetes-crossplane-infrastructure-operator/pkg/aws/autoscaling/fake" "github.com/topfreegames/kubernetes-crossplane-infrastructure-operator/pkg/aws/ec2" @@ -50,6 +52,7 @@ import ( "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" kopsapi "k8s.io/kops/pkg/apis/kops" + "k8s.io/kops/util/pkg/vfs" clusterv1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/conditions" ctrl "sigs.k8s.io/controller-runtime" @@ -304,6 +307,9 @@ var ( ObjectMeta: metav1.ObjectMeta{ Namespace: metav1.NamespaceDefault, Name: "test-cluster", + Labels: map[string]string{ + "environment": "testing", + }, }, Spec: clusterv1beta1.ClusterSpec{ ControlPlaneRef: &corev1.ObjectReference{ @@ -4463,6 +4469,175 @@ func TestSecurityGroupStatus(t *testing.T) { } } +func TestCustomMetrics(t *testing.T) { + testCases := []struct { + description string + expectedResult float64 + reconciliationStatusResult []string + }{ + { + description: "should be zero on successful reconciliation", + expectedResult: 0.0, + reconciliationStatusResult: []string{"succeed"}, + }, + { + description: "should get incremented on reconcile errors", + expectedResult: 1.0, + reconciliationStatusResult: []string{"fail"}, + }, + { + description: "should get incremented on consecutive reconcile errors", + expectedResult: 2.0, + reconciliationStatusResult: []string{"fail", "fail"}, + }, + { + description: "should be zero after a successful reconciliation", + expectedResult: 0.0, + reconciliationStatusResult: []string{"fail", "succeed"}, + }, + } + RegisterFailHandler(Fail) + g := NewWithT(t) + + custommetrics.ReconciliationConsecutiveErrorsTotal.Reset() + vfs.Context.ResetMemfsContext(true) + err := clusterv1beta1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = crossec2v1beta1.SchemeBuilder.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = securitygroupv1alpha2.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = kinfrastructurev1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = kcontrolplanev1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + fakeASGClient := &fakeasg.MockAutoScalingClient{} + fakeASGClient.MockDescribeAutoScalingGroups = func(ctx context.Context, params *awsautoscaling.DescribeAutoScalingGroupsInput, optFns []func(*awsautoscaling.Options)) (*awsautoscaling.DescribeAutoScalingGroupsOutput, error) { + return &awsautoscaling.DescribeAutoScalingGroupsOutput{ + AutoScalingGroups: []autoscalingtypes.AutoScalingGroup{ + { + AutoScalingGroupName: aws.String("test-asg"), + LaunchTemplate: &autoscalingtypes.LaunchTemplateSpecification{ + LaunchTemplateId: aws.String("lt-xxxx"), + Version: aws.String("1"), + }, + }, + }, + }, nil + } + fakeEC2Client := &fakeec2.MockEC2Client{} + fakeEC2Client.MockDescribeSecurityGroups = func(ctx context.Context, params *awsec2.DescribeSecurityGroupsInput, optFns []func(*awsec2.Options)) (*awsec2.DescribeSecurityGroupsOutput, error) { + return &awsec2.DescribeSecurityGroupsOutput{}, nil + } + fakeEC2Client.MockDescribeVpcs = func(ctx context.Context, input *awsec2.DescribeVpcsInput, opts []func(*awsec2.Options)) (*awsec2.DescribeVpcsOutput, error) { + return &awsec2.DescribeVpcsOutput{ + Vpcs: []ec2types.Vpc{ + { + VpcId: aws.String("x.x.x.x"), + }, + }, + }, nil + } + fakeEC2Client.MockDescribeLaunchTemplateVersions = func(ctx context.Context, params *awsec2.DescribeLaunchTemplateVersionsInput, optFns []func(*awsec2.Options)) (*awsec2.DescribeLaunchTemplateVersionsOutput, error) { + return &awsec2.DescribeLaunchTemplateVersionsOutput{ + LaunchTemplateVersions: []ec2types.LaunchTemplateVersion{ + { + LaunchTemplateId: params.LaunchTemplateId, + LaunchTemplateData: &ec2types.ResponseLaunchTemplateData{ + NetworkInterfaces: []ec2types.LaunchTemplateInstanceNetworkInterfaceSpecification{ + { + Groups: []string{ + "sg-xxxx", + }, + }, + }, + }, + }, + }, + }, nil + } + fakeEC2Client.MockCreateLaunchTemplateVersion = func(ctx context.Context, params *awsec2.CreateLaunchTemplateVersionInput, optFns []func(*awsec2.Options)) (*awsec2.CreateLaunchTemplateVersionOutput, error) { + return &awsec2.CreateLaunchTemplateVersionOutput{ + LaunchTemplateVersion: &ec2types.LaunchTemplateVersion{ + VersionNumber: aws.Int64(1), + }, + }, nil + } + fakeEC2Client.MockDescribeLaunchTemplates = func(ctx context.Context, params *awsec2.DescribeLaunchTemplatesInput, optFns []func(*awsec2.Options)) (*awsec2.DescribeLaunchTemplatesOutput, error) { + return &awsec2.DescribeLaunchTemplatesOutput{ + LaunchTemplates: []ec2types.LaunchTemplate{ + { + LaunchTemplateId: aws.String("lt-xxxx"), + LaunchTemplateName: aws.String("test-launch-template"), + Tags: []ec2types.Tag{ + { + Key: aws.String("tag:KubernetesCluster"), + Value: aws.String("test-cluster"), + }, + { + Key: aws.String("tag:Name"), + Value: aws.String("test-launch-template"), + }, + }, + }, + }, + }, nil + } + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + custommetrics.ReconciliationConsecutiveErrorsTotal.Reset() + + fakeClient := fake.NewClientBuilder().WithObjects(kcp, cluster, defaultSecret, karpenterKMP, sg, csg).WithScheme(scheme.Scheme).WithStatusSubresource(sg).Build() + + reconciler := &SecurityGroupReconciler{ + Client: fakeClient, + NewEC2ClientFactory: func(cfg aws.Config) ec2.EC2Client { + return fakeEC2Client + }, + NewAutoScalingClientFactory: func(cfg aws.Config) autoscaling.AutoScalingClient { + return fakeASGClient + }, + } + + for _, reconciliationStatus := range tc.reconciliationStatusResult { + reconciler.Recorder = record.NewFakeRecorder(10) + // This is just to force a reconciliation error so we can increment the metric + if reconciliationStatus == "fail" { + fakeEC2Client.MockDescribeInstances = func(ctx context.Context, input *awsec2.DescribeInstancesInput, opts []func(*awsec2.Options)) (*awsec2.DescribeInstancesOutput, error) { + return nil, errors.New("error") + } + } else { + fakeEC2Client.MockDescribeInstances = func(ctx context.Context, input *awsec2.DescribeInstancesInput, opts []func(*awsec2.Options)) (*awsec2.DescribeInstancesOutput, error) { + return &awsec2.DescribeInstancesOutput{}, nil + } + } + _, _ = reconciler.Reconcile(context.TODO(), ctrl.Request{ + NamespacedName: client.ObjectKey{ + Name: sg.Name, + }, + }) + + } + + var reconciliationConsecutiveErrorsTotal dto.Metric + g.Expect(func() error { + g.Expect(custommetrics.ReconciliationConsecutiveErrorsTotal.WithLabelValues("securitygroup", sg.Name, "testing").Write(&reconciliationConsecutiveErrorsTotal)).To(Succeed()) + metricValue := reconciliationConsecutiveErrorsTotal.GetGauge().GetValue() + if metricValue != tc.expectedResult { + return fmt.Errorf("metric value differs from expected: %f != %f", metricValue, tc.expectedResult) + } + return nil + }()).Should(Succeed()) + + }) + } +} + func assertConditions(g *WithT, from conditions.Getter, conditions ...*clusterv1beta1.Condition) { for _, condition := range conditions { assertCondition(g, from, condition) diff --git a/metrics/metrics.go b/metrics/metrics.go new file mode 100644 index 0000000..3530fae --- /dev/null +++ b/metrics/metrics.go @@ -0,0 +1,22 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + + // TODO: We could add the reason of the error as a label as well + ReconciliationConsecutiveErrorsTotal = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "custom_reconciliation_consecutive_errors_total", + Help: "Total number of consecutive reconciliation errors labeled by controller, cluster and environment", + }, []string{"controller", "sg_name", "cluster_environment"}, + ) +) + +func init() { + // Register custom metrics with the global prometheus registry + metrics.Registry.MustRegister(ReconciliationConsecutiveErrorsTotal) +} diff --git a/pkg/kops/kops.go b/pkg/kops/kops.go index a0bbac2..75624a0 100644 --- a/pkg/kops/kops.go +++ b/pkg/kops/kops.go @@ -24,12 +24,7 @@ func GetRegionFromKopsControlPlane(ctx context.Context, kcp *kcontrolplanev1alph return nil, err } - if err != nil { - return nil, err - } - return region, nil - } func awsConfigForCredential(ctx context.Context, region string, accessKey string, secretAccessKey string) (aws.Config, error) { From abbc1cd62ebcf58e25a5fab2fc80e7f8beb39086 Mon Sep 17 00:00:00 2001 From: Raphael Fonseca Date: Mon, 15 Jul 2024 12:02:18 -0300 Subject: [PATCH 2/3] chore: add metrics directory --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 693f940..7e74d3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,7 @@ COPY cmd/main.go cmd/main.go COPY api/ api/ COPY internal/controller/ internal/controller/ COPY pkg/ pkg/ +COPY metrics/ metrics/ # Build # the GOARCH has not a default value to allow the binary be built according to the host where the command From 421917e22ee18b4ded6c6e288cff73ba476de7fe Mon Sep 17 00:00:00 2001 From: Raphael Fonseca Date: Thu, 18 Jul 2024 19:08:51 -0300 Subject: [PATCH 3/3] chore: improve getEnvironment add tests, fix metrics labels --- .kubernetes/manifests.yaml | 5 +- .../manager_datadog_openmetrics_patch.yaml | 3 +- config/manager/kustomization.yaml | 2 +- .../ec2.aws/securitygroup_controller.go | 61 +++++---- .../ec2.aws/securitygroup_controller_test.go | 125 ++++++++++++++++++ metrics/metrics.go | 5 +- 6 files changed, 169 insertions(+), 32 deletions(-) diff --git a/.kubernetes/manifests.yaml b/.kubernetes/manifests.yaml index 6de6a4f..534affa 100644 --- a/.kubernetes/manifests.yaml +++ b/.kubernetes/manifests.yaml @@ -295,7 +295,8 @@ spec: [{ "metrics": [ "controller*", - "workqueue*" + "workqueue*", + "custom*" ], "namespace": "kcio", "prometheus_url": "http://%%host%%:8080/metrics", @@ -324,7 +325,7 @@ spec: key: account name: spotinst-credentials optional: true - image: tfgco/kubernetes-crossplane-infrastructure-operator:v0.8.0-alpha + image: tfgco/kubernetes-crossplane-infrastructure-operator:v0.8.4-alpha livenessProbe: httpGet: path: /healthz diff --git a/config/default/manager_datadog_openmetrics_patch.yaml b/config/default/manager_datadog_openmetrics_patch.yaml index ce7e42e..5990030 100644 --- a/config/default/manager_datadog_openmetrics_patch.yaml +++ b/config/default/manager_datadog_openmetrics_patch.yaml @@ -13,7 +13,8 @@ spec: [{ "metrics": [ "controller*", - "workqueue*" + "workqueue*", + "custom*" ], "namespace": "kcio", "prometheus_url": "http://%%host%%:8080/metrics", diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 8dcba8b..c090a7a 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -13,4 +13,4 @@ kind: Kustomization images: - name: controller newName: tfgco/kubernetes-crossplane-infrastructure-operator - newTag: v0.8.3-alpha + newTag: v0.8.4-alpha diff --git a/internal/controller/ec2.aws/securitygroup_controller.go b/internal/controller/ec2.aws/securitygroup_controller.go index cbc7ea6..6ff6c3d 100644 --- a/internal/controller/ec2.aws/securitygroup_controller.go +++ b/internal/controller/ec2.aws/securitygroup_controller.go @@ -976,34 +976,43 @@ func (r *SecurityGroupReconciliation) addKCPFinalizer(ctx context.Context, kcp k } func (r *SecurityGroupReconciliation) getEnvironment(ctx context.Context) string { - ref := r.sg.Spec.InfrastructureRef[0] var clusterObject clusterv1beta1.Cluster - switch ref.Kind { - case "KopsMachinePool": - kmp := kinfrastructurev1alpha1.KopsMachinePool{} - key := client.ObjectKey{ - Name: ref.Name, - Namespace: ref.Namespace, - } - if err := r.Client.Get(ctx, key, &kmp); err != nil { - return "" - } - clusterName := &kmp.Spec.ClusterName - key = client.ObjectKey{ - Name: *clusterName, - Namespace: kmp.ObjectMeta.Namespace, - } - if err := r.Client.Get(ctx, key, &clusterObject); err != nil { - return "" - } - case "KopsControlPlane": - key := client.ObjectKey{ - Name: ref.Name, - Namespace: ref.Namespace, + var err error + + for _, ref := range r.sg.Spec.InfrastructureRef { + err = nil + switch ref.Kind { + case "KopsMachinePool": + kmp := kinfrastructurev1alpha1.KopsMachinePool{} + key := client.ObjectKey{ + Name: ref.Name, + Namespace: ref.Namespace, + } + if err = r.Client.Get(ctx, key, &kmp); err != nil { + continue + } + clusterName := &kmp.Spec.ClusterName + key = client.ObjectKey{ + Name: *clusterName, + Namespace: kmp.ObjectMeta.Namespace, + } + err = r.Client.Get(ctx, key, &clusterObject) + case "KopsControlPlane": + key := client.ObjectKey{ + Name: ref.Name, + Namespace: ref.Namespace, + } + err = r.Client.Get(ctx, key, &clusterObject) } - if err := r.Client.Get(ctx, key, &clusterObject); err != nil { - return "" + if err != nil { + continue + } else { + break } } - return clusterObject.Labels["environment"] + if err == nil { + return clusterObject.Labels["environment"] + } else { + return "environmentNotFound" + } } diff --git a/internal/controller/ec2.aws/securitygroup_controller_test.go b/internal/controller/ec2.aws/securitygroup_controller_test.go index 3488fcb..7f0e717 100644 --- a/internal/controller/ec2.aws/securitygroup_controller_test.go +++ b/internal/controller/ec2.aws/securitygroup_controller_test.go @@ -4638,6 +4638,131 @@ func TestCustomMetrics(t *testing.T) { } } +func TestGetEnvironment(t *testing.T) { + testCases := []struct { + description string + k8sObjects []client.Object + sg *securitygroupv1alpha2.SecurityGroup + expectedEnvironment string + }{ + { + description: "should find environment by KMP", + k8sObjects: []client.Object{ + kmp, + cluster, + }, + sg: sg, + expectedEnvironment: "testing", + }, + { + description: "should find environment by KCP", + k8sObjects: []client.Object{ + kcp, + cluster, + }, + sg: sgKCP, + expectedEnvironment: "testing", + }, + { + description: "should not find environment because cluster not exists", + k8sObjects: []client.Object{ + kmp, + }, + sg: sgKCP, + expectedEnvironment: "environmentNotFound", + }, + { + description: "should not find environment because kmp not exists", + k8sObjects: []client.Object{ + cluster, + }, + sg: sg, + expectedEnvironment: "environmentNotFound", + }, + { + description: "should find environment by second KMP", + k8sObjects: []client.Object{ + kmp, + cluster, + }, + sg: &securitygroupv1alpha2.SecurityGroup{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-security-group", + }, + Spec: securitygroupv1alpha2.SecurityGroupSpec{ + IngressRules: []securitygroupv1alpha2.IngressRule{ + { + IPProtocol: "TCP", + FromPort: 40000, + ToPort: 60000, + AllowedCIDRBlocks: []string{ + "0.0.0.0/0", + }, + }, + }, + InfrastructureRef: []*corev1.ObjectReference{ + { + APIVersion: "infrastructure.cluster.x-k8s.io/v1alpha1", + Kind: "KopsMachinePool", + Name: "test-kops-machine-pool-not-exists", + Namespace: metav1.NamespaceDefault, + }, + { + APIVersion: "infrastructure.cluster.x-k8s.io/v1alpha1", + Kind: "KopsMachinePool", + Name: "test-kops-machine-pool", + Namespace: metav1.NamespaceDefault, + }, + }, + }, + }, + expectedEnvironment: "testing", + }, + } + + RegisterFailHandler(Fail) + g := NewWithT(t) + + err := clusterv1beta1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = crossec2v1beta1.SchemeBuilder.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = securitygroupv1alpha2.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = kinfrastructurev1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = kcontrolplanev1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + ctx := context.TODO() + fakeClient := fake.NewClientBuilder().WithScheme(scheme.Scheme).WithObjects(tc.k8sObjects...).Build() + + reconciler := SecurityGroupReconciler{ + Client: fakeClient, + NewEC2ClientFactory: nil, + NewAutoScalingClientFactory: nil, + } + + reconciliation := &SecurityGroupReconciliation{ + SecurityGroupReconciler: reconciler, + log: ctrl.LoggerFrom(ctx), + sg: tc.sg, + ec2Client: nil, + asgClient: nil, + } + + environment := reconciliation.getEnvironment(ctx) + g.Expect(environment).To(Equal(tc.expectedEnvironment)) + }) + } +} + func assertConditions(g *WithT, from conditions.Getter, conditions ...*clusterv1beta1.Condition) { for _, condition := range conditions { assertCondition(g, from, condition) diff --git a/metrics/metrics.go b/metrics/metrics.go index 3530fae..063c6f5 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -8,11 +8,12 @@ import ( var ( // TODO: We could add the reason of the error as a label as well + // For while we only have support for this metric in the securityGroup controller ReconciliationConsecutiveErrorsTotal = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "custom_reconciliation_consecutive_errors_total", - Help: "Total number of consecutive reconciliation errors labeled by controller, cluster and environment", - }, []string{"controller", "sg_name", "cluster_environment"}, + Help: "Total number of consecutive reconciliation errors labeled by controller, name of securityGroup/clustermesh and cluster environment", + }, []string{"controller", "object_name", "cluster_environment"}, ) )