Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Otel Controller - Auxiliares resources deletion #2575

Merged
merged 31 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5a31481
Fixed HPA deletion
yuriolisa Jan 29, 2024
bd9bd93
Fix e2e tests
yuriolisa Jan 29, 2024
10f82b0
Added reconciliation test
yuriolisa Feb 12, 2024
515a034
Changed from client.Object to HPA Obj
yuriolisa Feb 12, 2024
2ac2af7
Added owned objects function
yuriolisa Feb 12, 2024
a61de02
Fixed controller test
yuriolisa Feb 13, 2024
20c7bf6
Fixed controller test
yuriolisa Feb 13, 2024
29135cd
Fixed controller test
yuriolisa Feb 13, 2024
8f6159c
Add PodDisruptionBudget
yuriolisa Feb 14, 2024
dacc99a
Add PodDisruptionBudget
yuriolisa Feb 15, 2024
a0a30da
Change vars setting
yuriolisa Feb 16, 2024
1edd721
Change vars setting
yuriolisa Feb 16, 2024
29ad837
Change vars setting
yuriolisa Feb 19, 2024
2f1b576
Update e2e tests to chainsaw
yuriolisa Feb 20, 2024
9a669d0
Added ingress e2e tests
yuriolisa Feb 21, 2024
ebbb447
Added Volumes, changed function's place
yuriolisa Feb 26, 2024
55240ba
Added Volumes, changed function's place
yuriolisa Feb 26, 2024
4350a56
Added Volumes, changed function's place
yuriolisa Feb 27, 2024
1839504
Fixed Suite Test
yuriolisa Feb 27, 2024
88b30ef
Added RBAC for Volumes
yuriolisa Feb 27, 2024
7e1b010
Added RBAC for Volumes
yuriolisa Feb 27, 2024
18c8385
Fixed Suite Test
yuriolisa Feb 27, 2024
3e77e72
Fixed Suite Test
yuriolisa Feb 27, 2024
8bd21d5
Fixed Suite Test
yuriolisa Feb 27, 2024
2087366
Add a sleep
jaronoff97 Feb 27, 2024
d2a0269
Merge pull request #1 from jaronoff97/fix-hpa-delete
yuriolisa Feb 28, 2024
7d09dd0
Fixed Chainsaw test
yuriolisa Feb 28, 2024
42469b4
Fixed Chainsaw test
yuriolisa Feb 28, 2024
8af8038
Fixed Chainsaw test
yuriolisa Feb 28, 2024
1ffc7eb
Fixed Chainsaw test
yuriolisa Feb 28, 2024
ba7c3b2
Fixed Chainsaw test
yuriolisa Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/fix-hpa-delete.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
component: operator

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fixed HPA deletion

# One or more tracking issues related to the change
issues: [2568, 2587, 2651]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ endif

START_KIND_CLUSTER ?= true

KUBE_VERSION ?= 1.24
KUBE_VERSION ?= 1.29
KIND_CONFIG ?= kind-$(KUBE_VERSION).yaml
KIND_CLUSTER_NAME ?= "otel-operator"

Expand Down Expand Up @@ -367,7 +367,7 @@ KUSTOMIZE_VERSION ?= v5.0.3
CONTROLLER_TOOLS_VERSION ?= v0.12.0
GOLANGCI_LINT_VERSION ?= v1.54.0
KIND_VERSION ?= v0.20.0
CHAINSAW_VERSION ?= v0.1.6
CHAINSAW_VERSION ?= v0.1.7

.PHONY: install-tools
install-tools: kustomize golangci-lint kind controller-gen envtest crdoc kind operator-sdk chainsaw
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ metadata:
categories: Logging & Tracing,Monitoring
certified: "false"
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
createdAt: "2024-02-15T18:18:46Z"
createdAt: "2024-02-27T11:16:56Z"
description: Provides the OpenTelemetry components, including the Collector
operators.operatorframework.io/builder: operator-sdk-v1.29.0
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
Expand Down Expand Up @@ -175,6 +175,8 @@ spec:
- ""
resources:
- configmaps
- persistentvolumeclaims
- persistentvolumes
- pods
- serviceaccounts
- services
Expand Down
2 changes: 2 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ rules:
- ""
resources:
- configmaps
- persistentvolumeclaims
- persistentvolumes
- pods
- serviceaccounts
- services
Expand Down
3 changes: 3 additions & 0 deletions controllers/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,9 @@ service:
"app.kubernetes.io/instance": "test.test",
"app.kubernetes.io/managed-by": "opentelemetry-operator",
"app.kubernetes.io/name": "test-ingress",
"app.kubernetes.io/component": "opentelemetry-collector",
"app.kubernetes.io/part-of": "opentelemetry",
"app.kubernetes.io/version": "latest",
},
Annotations: map[string]string{
"something": "true",
Expand Down
24 changes: 22 additions & 2 deletions controllers/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -77,7 +78,7 @@ func BuildOpAMPBridge(params manifests.Params) ([]client.Object, error) {
}

// reconcileDesiredObjects runs the reconcile process using the mutateFn over the given list of objects.
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects ...client.Object) error {
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects []client.Object, ownedObjects map[types.UID]client.Object) error {
var errs []error
for _, desired := range desiredObjects {
l := logger.WithValues(
Expand All @@ -91,7 +92,6 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
continue
}
}

// existing is an object the controller runtime will hydrate for us
// we obtain the existing object by deep copying the desired object because it's the most convenient way
existing := desired.DeepCopyObject().(client.Object)
Expand All @@ -116,9 +116,29 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
}

l.V(1).Info(fmt.Sprintf("desired has been %s", op))
// This object is still managed by the operator, remove it from the list of objects to prune
delete(ownedObjects, existing.GetUID())
}
if len(errs) > 0 {
return fmt.Errorf("failed to create objects for %s: %w", owner.GetName(), errors.Join(errs...))
}
// Pruning owned objects in the cluster which are not should not be present after the reconciliation.
pruneErrs := []error{}
for _, obj := range ownedObjects {
l := logger.WithValues(
"object_name", obj.GetName(),
"object_kind", obj.GetObjectKind().GroupVersionKind(),
)

l.Info("pruning unmanaged resource")
err := kubeClient.Delete(ctx, obj)
if err != nil {
l.Error(err, "failed to delete resource")
pruneErrs = append(pruneErrs, err)
}
}
if len(pruneErrs) > 0 {
return fmt.Errorf("failed to prune objects for %s: %w", owner.GetName(), errors.Join(pruneErrs...))
}
return nil
}
2 changes: 1 addition & 1 deletion controllers/opampbridge_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (r *OpAMPBridgeReconciler) Reconcile(ctx context.Context, req ctrl.Request)
if buildErr != nil {
return ctrl.Result{}, buildErr
}
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects...)
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OpAMPBridge, params.Scheme, desiredObjects, nil)
return opampbridgeStatus.HandleReconcileStatus(ctx, log, params, err)
}

Expand Down
91 changes: 87 additions & 4 deletions controllers/opentelemetrycollector_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,32 @@ package controllers

import (
"context"
"fmt"

"github.com/go-logr/logr"
routev1 "github.com/openshift/api/route/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
policyV1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
"github.com/open-telemetry/opentelemetry-operator/internal/api/convert"
"github.com/open-telemetry/opentelemetry-operator/internal/autodetect/openshift"
"github.com/open-telemetry/opentelemetry-operator/internal/config"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
collectorStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/collector"
"github.com/open-telemetry/opentelemetry-operator/pkg/featuregate"
)
Expand All @@ -57,6 +65,71 @@ type Params struct {
Config config.Config
}

func (r *OpenTelemetryCollectorReconciler) findOtelOwnedObjects(ctx context.Context, params manifests.Params) (map[types.UID]client.Object, error) {
ownedObjects := map[types.UID]client.Object{}

listOps := &client.ListOptions{
Namespace: params.OtelCol.Namespace,
LabelSelector: labels.SelectorFromSet(manifestutils.SelectorLabels(params.OtelCol.ObjectMeta, collector.ComponentOpenTelemetryCollector)),
}
hpaList := &autoscalingv2.HorizontalPodAutoscalerList{}
err := r.List(ctx, hpaList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing HorizontalPodAutoscalers: %w", err)
}
for i := range hpaList.Items {
ownedObjects[hpaList.Items[i].GetUID()] = &hpaList.Items[i]
}
if featuregate.PrometheusOperatorIsAvailable.IsEnabled() {
servicemonitorList := &monitoringv1.ServiceMonitorList{}
err = r.List(ctx, servicemonitorList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing ServiceMonitors: %w", err)
}
for i := range servicemonitorList.Items {
ownedObjects[servicemonitorList.Items[i].GetUID()] = servicemonitorList.Items[i]
}

podMonitorList := &monitoringv1.PodMonitorList{}
err = r.List(ctx, podMonitorList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing PodMonitors: %w", err)
}
for i := range podMonitorList.Items {
ownedObjects[podMonitorList.Items[i].GetUID()] = podMonitorList.Items[i]
}
}
ingressList := &networkingv1.IngressList{}
err = r.List(ctx, ingressList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing Ingresses: %w", err)
}
for i := range ingressList.Items {
ownedObjects[ingressList.Items[i].GetUID()] = &ingressList.Items[i]
}

if params.Config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
routesList := &routev1.RouteList{}
err = r.List(ctx, routesList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing Routes: %w", err)
}
for i := range routesList.Items {
ownedObjects[routesList.Items[i].GetUID()] = &routesList.Items[i]
}
}
pdbList := &policyV1.PodDisruptionBudgetList{}
err = r.List(ctx, pdbList, listOps)
if err != nil {
return nil, fmt.Errorf("error listing PodDisruptionBudgets: %w", err)
}
for i := range pdbList.Items {
ownedObjects[pdbList.Items[i].GetUID()] = &pdbList.Items[i]
}

return ownedObjects, nil
}

func (r *OpenTelemetryCollectorReconciler) getParams(instance v1alpha1.OpenTelemetryCollector) (manifests.Params, error) {
otelCol, err := convert.V1Alpha1to2(instance)
if err != nil {
Expand Down Expand Up @@ -84,7 +157,7 @@ func NewReconciler(p Params) *OpenTelemetryCollectorReconciler {
return r
}

// +kubebuilder:rbac:groups="",resources=pods;configmaps;services;serviceaccounts,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=pods;configmaps;services;serviceaccounts;persistentvolumeclaims;persistentvolumes,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=apps,resources=daemonsets;deployments;statefulsets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -134,9 +207,13 @@ func (r *OpenTelemetryCollectorReconciler) Reconcile(ctx context.Context, req ct
if buildErr != nil {
return ctrl.Result{}, buildErr
}
// TODO: https://github.com/open-telemetry/opentelemetry-operator/issues/2620
// TODO: Change &instance to use params.OtelCol
err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects...)

ownedObjects, err := r.findOtelOwnedObjects(ctx, params)
if err != nil {
return ctrl.Result{}, err
}

err = reconcileDesiredObjects(ctx, r.Client, log, &instance, params.Scheme, desiredObjects, ownedObjects)
return collectorStatus.HandleReconcileStatus(ctx, log, params, instance, err)
}

Expand All @@ -150,6 +227,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
Owns(&appsv1.Deployment{}).
Owns(&appsv1.DaemonSet{}).
Owns(&appsv1.StatefulSet{}).
Owns(&corev1.PersistentVolume{}).
Owns(&corev1.PersistentVolumeClaim{}).
Owns(&networkingv1.Ingress{}).
jaronoff97 marked this conversation as resolved.
Show resolved Hide resolved
Owns(&autoscalingv2.HorizontalPodAutoscaler{}).
Owns(&policyV1.PodDisruptionBudget{})

Expand All @@ -162,6 +242,9 @@ func (r *OpenTelemetryCollectorReconciler) SetupWithManager(mgr ctrl.Manager) er
builder.Owns(&monitoringv1.ServiceMonitor{})
builder.Owns(&monitoringv1.PodMonitor{})
}
if r.config.OpenShiftRoutesAvailability() == openshift.RoutesAvailable {
builder.Owns(&routev1.Route{})
}

return builder.Complete(r)
}
14 changes: 11 additions & 3 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ import (
"time"

routev1 "github.com/openshift/api/route/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -104,7 +106,7 @@ func TestMain(m *testing.M) {

testEnv = &envtest.Environment{
CRDDirectoryPaths: []string{filepath.Join("..", "config", "crd", "bases")},
CRDs: []*apiextensionsv1.CustomResourceDefinition{testdata.OpenShiftRouteCRD},
CRDs: []*apiextensionsv1.CustomResourceDefinition{testdata.OpenShiftRouteCRD, testdata.ServiceMonitorCRD, testdata.PodMonitorCRD},
WebhookInstallOptions: envtest.WebhookInstallOptions{
Paths: []string{filepath.Join("..", "config", "webhook")},
},
Expand All @@ -114,12 +116,18 @@ func TestMain(m *testing.M) {
fmt.Printf("failed to start testEnv: %v", err)
os.Exit(1)
}

if err = monitoringv1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
}
if err = networkingv1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
}
if err = routev1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
}

if err = v1alpha1.AddToScheme(testScheme); err != nil {
fmt.Printf("failed to register scheme: %v", err)
os.Exit(1)
Expand Down
6 changes: 6 additions & 0 deletions internal/api/convert/v1alpha.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ func V1Alpha1to2(in v1alpha1.OpenTelemetryCollector) (v1beta1.OpenTelemetryColle
Pods: m.Pods,
}
}
if copy.Spec.MaxReplicas != nil && copy.Spec.Autoscaler.MaxReplicas == nil {
copy.Spec.Autoscaler.MaxReplicas = copy.Spec.MaxReplicas
}
if copy.Spec.MinReplicas != nil && copy.Spec.Autoscaler.MinReplicas == nil {
copy.Spec.Autoscaler.MinReplicas = copy.Spec.MinReplicas
}
out.Spec.OpenTelemetryCommonFields.Autoscaler = &v1beta1.AutoscalerSpec{
MinReplicas: copy.Spec.Autoscaler.MinReplicas,
MaxReplicas: copy.Spec.Autoscaler.MaxReplicas,
Expand Down
5 changes: 2 additions & 3 deletions internal/manifests/collector/horizontalpodautoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,22 @@ import (
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests"
"github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils"
"github.com/open-telemetry/opentelemetry-operator/internal/naming"
)

func HorizontalPodAutoscaler(params manifests.Params) (client.Object, error) {
func HorizontalPodAutoscaler(params manifests.Params) (*autoscalingv2.HorizontalPodAutoscaler, error) {
name := naming.Collector(params.OtelCol.Name)
labels := manifestutils.Labels(params.OtelCol.ObjectMeta, name, params.OtelCol.Spec.Image, ComponentOpenTelemetryCollector, params.Config.LabelsFilter())
annotations, err := Annotations(params.OtelCol)
if err != nil {
return nil, err
}

var result client.Object
var result *autoscalingv2.HorizontalPodAutoscaler

objectMeta := metav1.ObjectMeta{
Name: naming.HorizontalPodAutoscaler(params.OtelCol.Name),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

Expand Down Expand Up @@ -95,11 +94,9 @@ func TestHPA(t *testing.T) {
},
Log: logger,
}
raw, err := HorizontalPodAutoscaler(params)
hpa, err := HorizontalPodAutoscaler(params)
require.NoError(t, err)

hpa := raw.(*autoscalingv2.HorizontalPodAutoscaler)

// verify
assert.Equal(t, "my-instance-collector", hpa.Name)
assert.Equal(t, "my-instance-collector", hpa.Labels["app.kubernetes.io/name"])
Expand Down
Loading
Loading