Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

addon: Add support for logging agent health checking #42

Merged
merged 19 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions internal/addon/addon.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
package addon

import (
"errors"
"fmt"

otelv1alpha1 "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
loggingv1 "github.com/openshift/cluster-logging-operator/apis/logging/v1"
"open-cluster-management.io/addon-framework/pkg/agent"
"open-cluster-management.io/addon-framework/pkg/utils"
addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1"
workv1 "open-cluster-management.io/api/work/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

var (
errProbeConditionNotSatisfied = errors.New("probe condition is not satisfied")
errProbeValueIsNil = errors.New("probe value is nil")
errValueNotProbed = errors.New("value not probed")
)

func NewRegistrationOption(agentName string) *agent.RegistrationOption {
return &agent.RegistrationOption{
CSRConfigurations: agent.KubeClientSignerConfigurations(Name, agentName),
Expand All @@ -30,3 +42,90 @@ func GetObjectKey(configRef []addonapiv1alpha1.ConfigReference, group, resource
}
return key
}

// AgentHealthProber returns a HealthProber struct that contains the necessary
// information to assert if an addon deployment is ready or not.
func AgentHealthProber() *agent.HealthProber {
JoaoBraveCoding marked this conversation as resolved.
Show resolved Hide resolved
return &agent.HealthProber{
Type: agent.HealthProberTypeWork,
WorkProber: &agent.WorkHealthProber{
ProbeFields: []agent.ProbeField{
{
ResourceIdentifier: workv1.ResourceIdentifier{
Group: loggingv1.GroupVersion.Group,
Resource: ClusterLogForwardersResource,
Name: SpokeCLFName,
Namespace: SpokeCLFNamespace,
},
ProbeRules: []workv1.FeedbackRule{
{
Type: workv1.JSONPathsType,
JsonPaths: []workv1.JsonPath{
{
Name: clfProbeKey,
Path: clfProbePath,
},
},
},
},
},
{
ResourceIdentifier: workv1.ResourceIdentifier{
Group: otelv1alpha1.GroupVersion.Group,
Resource: OpenTelemetryCollectorsResource,
Name: SpokeOTELColName,
Namespace: SpokeOTELColNamespace,
},
ProbeRules: []workv1.FeedbackRule{
{
Type: workv1.JSONPathsType,
JsonPaths: []workv1.JsonPath{
{
Name: otelColProbeKey,
Path: otelColProbePath,
},
},
},
},
},
},
HealthCheck: func(identifier workv1.ResourceIdentifier, result workv1.StatusFeedbackResult) error {
for _, value := range result.Values {
switch {
case identifier.Resource == ClusterLogForwardersResource:
if value.Name != clfProbeKey {
continue
}

if value.Value.String == nil {
return fmt.Errorf("%w: clusterlogforwarder with key %s/%s", errProbeValueIsNil, identifier.Namespace, identifier.Name)
}

if *value.Value.String != "True" {
return fmt.Errorf("%w: clusterlogforwarder status condition type is %s for %s/%s", errProbeConditionNotSatisfied, *value.Value.String, identifier.Namespace, identifier.Name)
}

return nil
case identifier.Resource == OpenTelemetryCollectorsResource:
if value.Name != otelColProbeKey {
continue
}

if value.Value.Integer == nil {
return fmt.Errorf("%w: opentelemetrycollector with key %s/%s", errProbeValueIsNil, identifier.Namespace, identifier.Name)
}

if *value.Value.Integer < 1 {
return fmt.Errorf("%w: opentelemetrycollector replicas is %d for %s/%s", errProbeConditionNotSatisfied, *value.Value.Integer, identifier.Namespace, identifier.Name)
}

return nil
default:
continue
}
}
return fmt.Errorf("%w: for resource %s with key %s/%s", errValueNotProbed, identifier.Resource, identifier.Namespace, identifier.Name)
},
},
}
}
100 changes: 100 additions & 0 deletions internal/addon/addon_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package addon

import (
"fmt"
"testing"

"github.com/stretchr/testify/require"
workv1 "open-cluster-management.io/api/work/v1"

otelv1alpha1 "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1"
loggingv1 "github.com/openshift/cluster-logging-operator/apis/logging/v1"
)

func Test_AgentHealthProber_CLF(t *testing.T) {
unhealthyError := fmt.Errorf("%w: clusterlogforwarder status condition type is %s for %s/%s", errProbeConditionNotSatisfied, "False", SpokeCLFNamespace, SpokeCLFName)
for _, tc := range []struct {
name string
status string
expectedErr string
}{
{
name: "healthy",
status: "True",
},
{
name: "unhealthy",
status: "False",
expectedErr: unhealthyError.Error(),
},
} {
t.Run(tc.name, func(t *testing.T) {
healthProber := AgentHealthProber()
err := healthProber.WorkProber.HealthCheck(workv1.ResourceIdentifier{
Group: loggingv1.GroupVersion.Group,
Resource: ClusterLogForwardersResource,
Name: SpokeCLFName,
Namespace: SpokeCLFNamespace,
}, workv1.StatusFeedbackResult{
Values: []workv1.FeedbackValue{
{
Name: "isReady",
Value: workv1.FieldValue{
Type: workv1.String,
String: &tc.status,
},
},
},
})
if tc.expectedErr != "" {
require.EqualError(t, err, tc.expectedErr)
return
}
require.NoError(t, err)
})
}
}

func Test_AgentHealthProber_OTELCol(t *testing.T) {
unhealthyError := fmt.Errorf("%w: opentelemetrycollector replicas is %d for %s/%s", errProbeConditionNotSatisfied, 0, SpokeOTELColNamespace, SpokeOTELColName)
for _, tc := range []struct {
name string
replicas int64
expectedErr string
}{
{
name: "healthy",
replicas: 1,
},
{
name: "unhealthy",
replicas: 0,
expectedErr: unhealthyError.Error(),
},
} {
t.Run(tc.name, func(t *testing.T) {
healthProber := AgentHealthProber()
err := healthProber.WorkProber.HealthCheck(workv1.ResourceIdentifier{
Group: otelv1alpha1.GroupVersion.Group,
Resource: OpenTelemetryCollectorsResource,
Name: SpokeOTELColName,
Namespace: SpokeOTELColNamespace,
}, workv1.StatusFeedbackResult{
Values: []workv1.FeedbackValue{
{
Name: "replicas",
Value: workv1.FieldValue{
Type: workv1.Integer,
Integer: &tc.replicas,
},
},
},
})
if tc.expectedErr != "" {
require.EqualError(t, err, tc.expectedErr)
return
}
require.NoError(t, err)
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
apiVersion: logging.openshift.io/v1
kind: ClusterLogForwarder
metadata:
name: instance
name: mcoa-instance
namespace: openshift-logging
labels:
app: {{ template "logginghelm.name" . }}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{{- if .Values.enabled }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: openshift-logging:mcoa-logcollector:application-logs
labels:
app: {{ template "logginghelm.name" . }}
chart: {{ template "logginghelm.chart" . }}
release: {{ .Release.Name }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: collect-application-logs
subjects:
- kind: ServiceAccount
name: mcoa-logcollector
namespace: openshift-logging
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: openshift-logging:mcoa-logcollector:audit-logs
labels:
app: {{ template "logginghelm.name" . }}
chart: {{ template "logginghelm.chart" . }}
release: {{ .Release.Name }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: collect-audit-logs
subjects:
- kind: ServiceAccount
name: mcoa-logcollector
namespace: openshift-logging
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: openshift-logging:mcoa-logcollector:infrastructure-logs
labels:
app: {{ template "logginghelm.name" . }}
chart: {{ template "logginghelm.chart" . }}
release: {{ .Release.Name }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: collect-infrastructure-logs
subjects:
- kind: ServiceAccount
name: mcoa-logcollector
namespace: openshift-logging
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{{- if .Values.enabled }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: mcoa-logcollector
namespace: openshift-logging
labels:
app: {{ template "logginghelm.name" . }}
chart: {{ template "logginghelm.chart" . }}
release: {{ .Release.Name }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ metadata:
apiVersion: v1
kind: Namespace
metadata:
name: spoke-otelcol
name: mcoa-opentelemetry
labels:
app: {{ template "tracinghelm.name" . }}
chart: {{ template "tracinghelm.chart" . }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
apiVersion: opentelemetry.io/v1beta1
kind: OpenTelemetryCollector
metadata:
name: spoke-otelcol
namespace: spoke-otelcol
name: mcoa-instance
namespace: mcoa-opentelemetry
labels:
app: {{ template "tracinghelm.name" . }}
chart: {{ template "tracinghelm.chart" . }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ apiVersion: v1
kind: Secret
metadata:
name: {{ $secret_config.name }}
namespace: spoke-otelcol
namespace: mcoa-opentelemetry
labels:
app: {{ template "tracinghelm.name" $ }}
chart: {{ template "tracinghelm.chart" $ }}
Expand Down
12 changes: 12 additions & 0 deletions internal/addon/var.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ const (

AdcLoggingDisabledKey = "loggingDisabled"
AdcTracingisabledKey = "tracingDisabled"

ClusterLogForwardersResource = "clusterlogforwarders"
SpokeCLFName = "mcoa-instance"
SpokeCLFNamespace = "openshift-logging"
clfProbeKey = "isReady"
clfProbePath = ".status.conditions[?(@.type==\"Ready\")].status"

OpenTelemetryCollectorsResource = "opentelemetrycollectors"
SpokeOTELColName = "mcoa-instance"
SpokeOTELColNamespace = "mcoa-opentelemetry"
otelColProbeKey = "replicas"
otelColProbePath = ".spec.replicas"
)

//go:embed manifests
Expand Down
6 changes: 1 addition & 5 deletions internal/logging/handlers/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
clusterLogForwarderResource = "clusterlogforwarders"
)

func BuildOptions(ctx context.Context, k8s client.Client, mcAddon *addonapiv1alpha1.ManagedClusterAddOn, adoc *addonapiv1alpha1.AddOnDeploymentConfig) (manifests.Options, error) {
resources := manifests.Options{
AddOnDeploymentConfig: adoc,
}

key := addon.GetObjectKey(mcAddon.Status.ConfigReferences, loggingv1.GroupVersion.Group, clusterLogForwarderResource)
key := addon.GetObjectKey(mcAddon.Status.ConfigReferences, loggingv1.GroupVersion.Group, addon.ClusterLogForwardersResource)
clf := &loggingv1.ClusterLogForwarder{}
if err := k8s.Get(ctx, key, clf, &client.GetOptions{}); err != nil {
return resources, err
Expand Down
10 changes: 7 additions & 3 deletions internal/logging/helm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ func Test_Logging_AllConfigsTogether_AllResources(t *testing.T) {
},
ConfigReferent: addonapiv1alpha1.ConfigReferent{
Namespace: "open-cluster-management",
Name: "instance",
Name: "mcoa-instance",
},
},
}

// Setup configuration resources: ClusterLogForwarder, AddOnDeploymentConfig, Secrets, ConfigMaps
clf = &loggingv1.ClusterLogForwarder{
ObjectMeta: metav1.ObjectMeta{
Name: "instance",
Name: "mcoa-instance",
Namespace: "open-cluster-management",
Annotations: map[string]string{
"authentication.mcoa.openshift.io/app-logs": "SecretReference",
Expand Down Expand Up @@ -215,7 +215,7 @@ func Test_Logging_AllConfigsTogether_AllResources(t *testing.T) {
// Render manifests and return them as k8s runtime objects
objects, err := loggingAgentAddon.Manifests(managedCluster, managedClusterAddOn)
require.NoError(t, err)
require.Equal(t, 7, len(objects))
require.Equal(t, 11, len(objects))

for _, obj := range objects {
switch obj := obj.(type) {
Expand All @@ -226,6 +226,10 @@ func Test_Logging_AllConfigsTogether_AllResources(t *testing.T) {
require.NotNil(t, obj.Spec.Outputs[1].Secret)
require.Equal(t, "static-authentication", obj.Spec.Outputs[0].Secret.Name)
require.Equal(t, "static-authentication", obj.Spec.Outputs[1].Secret.Name)
// Check name and namespace to make sure that if we change the helm
// manifests that we don't break the addon probes
require.Equal(t, addon.SpokeCLFName, obj.Name)
require.Equal(t, addon.SpokeCLFNamespace, obj.Namespace)
case *corev1.Secret:
if obj.Name == "static-authentication" {
require.Equal(t, staticCred.Data, obj.Data)
Expand Down
Loading
Loading