diff --git a/operator/api/v1alpha1/common/common.go b/operator/api/v1alpha1/common/common.go index 4a51eea71b..ef066b3933 100644 --- a/operator/api/v1alpha1/common/common.go +++ b/operator/api/v1alpha1/common/common.go @@ -51,6 +51,10 @@ func (k KeptnState) IsFailed() bool { return k == StateFailed } +func (k KeptnState) IsPending() bool { + return k == StatePending +} + type StatusSummary struct { Total int progressing int diff --git a/operator/api/v1alpha1/keptnappversion_types.go b/operator/api/v1alpha1/keptnappversion_types.go index fd31b35f8e..f00176cbfc 100644 --- a/operator/api/v1alpha1/keptnappversion_types.go +++ b/operator/api/v1alpha1/keptnappversion_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + "fmt" "time" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" @@ -205,3 +206,31 @@ func (v KeptnAppVersion) GetDurationMetricsAttributes() []attribute.KeyValue { common.AppPreviousVersion.String(v.Spec.PreviousVersion), } } + +func (v KeptnAppVersion) GetState() common.KeptnState { + return v.Status.Status +} + +func (v *KeptnAppVersion) SetState(state common.KeptnState) { + v.Status.Status = state +} + +func (v KeptnAppVersion) GetCurrentPhase() string { + return v.Status.CurrentPhase +} + +func (v *KeptnAppVersion) SetCurrentPhase(phase string) { + v.Status.CurrentPhase = phase +} + +func (v *KeptnAppVersion) Complete() { + v.SetEndTime() +} + +func (v KeptnAppVersion) GetVersion() string { + return v.Spec.Version +} + +func (v KeptnAppVersion) GetSpanName(phase string) string { + return fmt.Sprintf("%s.%s.%s.%s", v.Spec.TraceId, v.Spec.AppName, v.Spec.Version, phase) +} diff --git a/operator/api/v1alpha1/keptnworkloadinstance_types.go b/operator/api/v1alpha1/keptnworkloadinstance_types.go index 83e390a13e..2e33a3ef08 100644 --- a/operator/api/v1alpha1/keptnworkloadinstance_types.go +++ b/operator/api/v1alpha1/keptnworkloadinstance_types.go @@ -17,6 +17,7 @@ limitations under the License. package v1alpha1 import ( + "fmt" "time" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" @@ -242,3 +243,31 @@ func (i KeptnWorkloadInstance) GetIntervalMetricsAttributes() []attribute.KeyVal common.WorkloadPreviousVersion.String(i.Spec.PreviousVersion), } } + +func (i KeptnWorkloadInstance) GetState() common.KeptnState { + return i.Status.Status +} + +func (i *KeptnWorkloadInstance) SetState(state common.KeptnState) { + i.Status.Status = state +} + +func (i KeptnWorkloadInstance) GetCurrentPhase() string { + return i.Status.CurrentPhase +} + +func (i *KeptnWorkloadInstance) SetCurrentPhase(phase string) { + i.Status.CurrentPhase = phase +} + +func (i *KeptnWorkloadInstance) Complete() { + i.SetEndTime() +} + +func (i KeptnWorkloadInstance) GetVersion() string { + return i.Spec.Version +} + +func (v KeptnWorkloadInstance) GetSpanName(phase string) string { + return fmt.Sprintf("%s.%s.%s.%s", v.Spec.TraceId, v.Spec.AppName, v.Spec.Version, phase) +} diff --git a/operator/controllers/common/helperfunctions.go b/operator/controllers/common/helperfunctions.go new file mode 100644 index 0000000000..72ff276e4a --- /dev/null +++ b/operator/controllers/common/helperfunctions.go @@ -0,0 +1,37 @@ +package common + +import ( + klcv1alpha1 "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" + apicommon "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" + "k8s.io/apimachinery/pkg/types" +) + +func GetTaskStatus(taskName string, instanceStatus []klcv1alpha1.TaskStatus) klcv1alpha1.TaskStatus { + for _, status := range instanceStatus { + if status.TaskDefinitionName == taskName { + return status + } + } + return klcv1alpha1.TaskStatus{ + TaskDefinitionName: taskName, + Status: apicommon.StatePending, + TaskName: "", + } +} + +func GetEvaluationStatus(evaluationName string, instanceStatus []klcv1alpha1.EvaluationStatus) klcv1alpha1.EvaluationStatus { + for _, status := range instanceStatus { + if status.EvaluationDefinitionName == evaluationName { + return status + } + } + return klcv1alpha1.EvaluationStatus{ + EvaluationDefinitionName: evaluationName, + Status: apicommon.StatePending, + EvaluationName: "", + } +} + +func GetAppVersionName(namespace string, appName string, version string) types.NamespacedName { + return types.NamespacedName{Namespace: namespace, Name: appName + "-" + version} +} diff --git a/operator/controllers/common/interfaces.go b/operator/controllers/common/interfaces.go new file mode 100644 index 0000000000..66e5303d09 --- /dev/null +++ b/operator/controllers/common/interfaces.go @@ -0,0 +1,65 @@ +package common + +import ( + "errors" + + "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" + "go.opentelemetry.io/otel/attribute" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +//go:generate moq -pkg common_mock --skip-ensure -out ./fake/phaseitem_mock.go . PhaseItem +type PhaseItem interface { + GetState() common.KeptnState + SetState(common.KeptnState) + GetCurrentPhase() string + SetCurrentPhase(string) + GetVersion() string + GetMetricsAttributes() []attribute.KeyValue + GetSpanName(phase string) string + Complete() +} + +type PhaseItemWrapper struct { + Obj PhaseItem +} + +func NewPhaseItemWrapperFromClientObject(object client.Object) (*PhaseItemWrapper, error) { + pi, ok := object.(PhaseItem) + if !ok { + return nil, errors.New("provided object does not implement PhaseItem interface") + } + return &PhaseItemWrapper{Obj: pi}, nil +} + +func (pw PhaseItemWrapper) GetState() common.KeptnState { + return pw.Obj.GetState() +} + +func (pw *PhaseItemWrapper) SetState(state common.KeptnState) { + pw.Obj.SetState(state) +} + +func (pw PhaseItemWrapper) GetCurrentPhase() string { + return pw.Obj.GetCurrentPhase() +} + +func (pw *PhaseItemWrapper) SetCurrentPhase(phase string) { + pw.Obj.SetCurrentPhase(phase) +} + +func (pw PhaseItemWrapper) GetMetricsAttributes() []attribute.KeyValue { + return pw.Obj.GetMetricsAttributes() +} + +func (pw *PhaseItemWrapper) Complete() { + pw.Obj.Complete() +} + +func (pw PhaseItemWrapper) GetVersion() string { + return pw.Obj.GetVersion() +} + +func (pw PhaseItemWrapper) GetSpanName(phase string) string { + return pw.Obj.GetSpanName(phase) +} diff --git a/operator/controllers/common/interfaces_test.go b/operator/controllers/common/interfaces_test.go new file mode 100644 index 0000000000..135f98125c --- /dev/null +++ b/operator/controllers/common/interfaces_test.go @@ -0,0 +1,26 @@ +package common + +import ( + "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" + "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" + "github.com/stretchr/testify/require" + "testing" +) + +func TestPhaseItemWrapper_GetState(t *testing.T) { + appVersion := &v1alpha1.KeptnAppVersion{ + Status: v1alpha1.KeptnAppVersionStatus{ + Status: common.StateFailed, + CurrentPhase: "test", + }, + } + + object, err := NewPhaseItemWrapperFromClientObject(appVersion) + require.Nil(t, err) + + require.Equal(t, "test", object.GetCurrentPhase()) + + object.Complete() + + require.NotZero(t, appVersion.Status.EndTime) +} diff --git a/operator/controllers/common/phasehandler.go b/operator/controllers/common/phasehandler.go new file mode 100644 index 0000000000..39c0b317bc --- /dev/null +++ b/operator/controllers/common/phasehandler.go @@ -0,0 +1,104 @@ +package common + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type PhaseHandler struct { + client.Client + Recorder record.EventRecorder + Log logr.Logger + SpanHandler SpanHandler +} + +type PhaseResult struct { + Continue bool + ctrl.Result +} + +func RecordEvent(recorder record.EventRecorder, phase common.KeptnPhaseType, eventType string, reconcileObject client.Object, shortReason string, longReason string, version string) { + recorder.Event(reconcileObject, eventType, fmt.Sprintf("%s%s", phase.ShortName, shortReason), fmt.Sprintf("%s %s / Namespace: %s, Name: %s, Version: %s ", phase.LongName, longReason, reconcileObject.GetNamespace(), reconcileObject.GetName(), version)) +} + +func (r PhaseHandler) HandlePhase(ctx context.Context, ctxAppTrace context.Context, tracer trace.Tracer, reconcileObject client.Object, phase common.KeptnPhaseType, span trace.Span, reconcilePhase func() (common.KeptnState, error)) (*PhaseResult, error) { + requeueResult := ctrl.Result{Requeue: true, RequeueAfter: 5 * time.Second} + piWrapper, err := NewPhaseItemWrapperFromClientObject(reconcileObject) + if err != nil { + return &PhaseResult{Continue: false, Result: ctrl.Result{Requeue: true}}, err + } + oldStatus := piWrapper.GetState() + oldPhase := piWrapper.GetCurrentPhase() + piWrapper.SetCurrentPhase(phase.ShortName) + + r.Log.Info(phase.LongName + " not finished") + ctxAppTrace, spanAppTrace, err := r.SpanHandler.GetSpan(ctxAppTrace, tracer, reconcileObject, phase.ShortName) + if err != nil { + r.Log.Error(err, "could not get span") + } + + state, err := reconcilePhase() + if err != nil { + spanAppTrace.AddEvent(phase.LongName + " could not get reconciled") + RecordEvent(r.Recorder, phase, "Warning", reconcileObject, "ReconcileErrored", "could not get reconciled", piWrapper.GetVersion()) + span.SetStatus(codes.Error, err.Error()) + return &PhaseResult{Continue: false, Result: requeueResult}, err + } + + if state.IsPending() { + state = common.StateProgressing + } + + defer func(oldStatus common.KeptnState, oldPhase string, reconcileObject client.Object) { + piWrapper, _ := NewPhaseItemWrapperFromClientObject(reconcileObject) + if oldStatus != piWrapper.GetState() || oldPhase != piWrapper.GetCurrentPhase() { + ctx, spanAppTrace, err = r.SpanHandler.GetSpan(ctxAppTrace, tracer, reconcileObject, piWrapper.GetCurrentPhase()) + if err != nil { + r.Log.Error(err, "could not get span") + } + if err := r.Status().Update(ctx, reconcileObject); err != nil { + r.Log.Error(err, "could not update status") + } + } + }(oldStatus, oldPhase, reconcileObject) + + if state.IsCompleted() { + if state.IsFailed() { + piWrapper.Complete() + piWrapper.SetState(common.StateFailed) + spanAppTrace.AddEvent(phase.LongName + " has failed") + spanAppTrace.SetStatus(codes.Error, "Failed") + spanAppTrace.End() + if err := r.SpanHandler.UnbindSpan(reconcileObject, phase.ShortName); err != nil { + r.Log.Error(err, "cannot unbind span") + } + RecordEvent(r.Recorder, phase, "Warning", reconcileObject, "Failed", "has failed", piWrapper.GetVersion()) + return &PhaseResult{Continue: false, Result: ctrl.Result{}}, nil + } + + piWrapper.SetState(common.StateSucceeded) + spanAppTrace.AddEvent(phase.LongName + " has succeeded") + spanAppTrace.SetStatus(codes.Ok, "Succeeded") + spanAppTrace.End() + if err := r.SpanHandler.UnbindSpan(reconcileObject, phase.ShortName); err != nil { + r.Log.Error(err, "cannot unbind span") + } + RecordEvent(r.Recorder, phase, "Normal", reconcileObject, "Succeeded", "has succeeded", piWrapper.GetVersion()) + + return &PhaseResult{Continue: true, Result: requeueResult}, nil + } + + piWrapper.SetState(common.StateProgressing) + RecordEvent(r.Recorder, phase, "Warning", reconcileObject, "NotFinished", "has not finished", piWrapper.GetVersion()) + + return &PhaseResult{Continue: false, Result: requeueResult}, nil +} diff --git a/operator/controllers/common/spanhandler.go b/operator/controllers/common/spanhandler.go new file mode 100644 index 0000000000..a117ee744f --- /dev/null +++ b/operator/controllers/common/spanhandler.go @@ -0,0 +1,38 @@ +package common + +import ( + "context" + + "go.opentelemetry.io/otel/trace" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type SpanHandler struct { + bindCRDSpan map[string]trace.Span +} + +func (r SpanHandler) GetSpan(ctx context.Context, tracer trace.Tracer, reconcileObject client.Object, phase string) (context.Context, trace.Span, error) { + piWrapper, err := NewPhaseItemWrapperFromClientObject(reconcileObject) + if err != nil { + return nil, nil, err + } + appvName := piWrapper.GetSpanName(phase) + if r.bindCRDSpan == nil { + r.bindCRDSpan = make(map[string]trace.Span) + } + if span, ok := r.bindCRDSpan[appvName]; ok { + return ctx, span, nil + } + ctx, span := tracer.Start(ctx, phase, trace.WithSpanKind(trace.SpanKindConsumer)) + r.bindCRDSpan[appvName] = span + return ctx, span, nil +} + +func (r SpanHandler) UnbindSpan(reconcileObject client.Object, phase string) error { + piWrapper, err := NewPhaseItemWrapperFromClientObject(reconcileObject) + if err != nil { + return err + } + delete(r.bindCRDSpan, piWrapper.GetSpanName(phase)) + return nil +} diff --git a/operator/controllers/keptnapp/controller.go b/operator/controllers/keptnapp/controller.go index 3a8c3027f6..3d6f14e986 100644 --- a/operator/controllers/keptnapp/controller.go +++ b/operator/controllers/keptnapp/controller.go @@ -19,6 +19,7 @@ package keptnapp import ( "context" "fmt" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/predicate" diff --git a/operator/controllers/keptnappversion/controller.go b/operator/controllers/keptnappversion/controller.go index f394b7c499..3d4e08e523 100644 --- a/operator/controllers/keptnappversion/controller.go +++ b/operator/controllers/keptnappversion/controller.go @@ -19,9 +19,12 @@ package keptnappversion import ( "context" "fmt" - "k8s.io/apimachinery/pkg/types" "time" + "k8s.io/apimachinery/pkg/types" + + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" + "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/semconv" "go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/trace" @@ -44,13 +47,13 @@ import ( // KeptnAppVersionReconciler reconciles a KeptnAppVersion object type KeptnAppVersionReconciler struct { + Scheme *runtime.Scheme client.Client - Scheme *runtime.Scheme Log logr.Logger Recorder record.EventRecorder Tracer trace.Tracer Meters common.KeptnMeters - bindCRDSpan map[string]trace.Span + SpanHandler controllercommon.SpanHandler } //+kubebuilder:rbac:groups=lifecycle.keptn.sh,resources=keptnappversions,verbs=get;list;watch;create;update;patch;delete @@ -91,27 +94,50 @@ func (r *KeptnAppVersionReconciler) Reconcile(ctx context.Context, req ctrl.Requ ctxAppTrace := otel.GetTextMapPropagator().Extract(context.TODO(), appTraceContextCarrier) ctx, span := r.Tracer.Start(ctx, "reconcile_app_version", trace.WithSpanKind(trace.SpanKindConsumer)) - defer span.End() + + defer func(span trace.Span, appVersion *klcv1alpha1.KeptnAppVersion) { + if appVersion.IsEndTimeSet() { + r.Log.Info("Increasing app count") + attrs := appVersion.GetMetricsAttributes() + r.Meters.AppCount.Add(ctx, 1, attrs...) + } + span.End() + }(span, appVersion) semconv.AddAttributeFromAppVersion(span, *appVersion) phase := common.PhaseAppPreDeployment + phaseHandler := controllercommon.PhaseHandler{ + Client: r.Client, + Recorder: r.Recorder, + Log: r.Log, + SpanHandler: r.SpanHandler, + } + if appVersion.Status.CurrentPhase == "" { - r.unbindSpan(appVersion, phase.ShortName) + if err := r.SpanHandler.UnbindSpan(appVersion, phase.ShortName); err != nil { + r.Log.Error(err, "cannot unbind span") + } var spanAppTrace trace.Span - ctxAppTrace, spanAppTrace = r.getSpan(ctxAppTrace, appVersion, phase.ShortName) + ctxAppTrace, spanAppTrace, err = r.SpanHandler.GetSpan(ctxAppTrace, r.Tracer, appVersion, phase.ShortName) + if err != nil { + r.Log.Error(err, "could not get span") + } semconv.AddAttributeFromAppVersion(spanAppTrace, *appVersion) spanAppTrace.AddEvent("App Version Pre-Deployment Tasks started", trace.WithTimestamp(time.Now())) - r.recordEvent(phase, "Normal", appVersion, "Started", "have started") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", appVersion, "Started", "have started", appVersion.GetVersion()) } if !appVersion.IsPreDeploymentSucceeded() { reconcilePreDep := func() (common.KeptnState, error) { return r.reconcilePrePostDeployment(ctx, appVersion, common.PreDeploymentCheckType) } - return r.handlePhase(ctx, ctxAppTrace, appVersion, phase, span, appVersion.IsPreDeploymentFailed, reconcilePreDep) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, appVersion, phase, span, reconcilePreDep) + if !result.Continue { + return result.Result, err + } } phase = common.PhaseAppPreEvaluation @@ -119,7 +145,10 @@ func (r *KeptnAppVersionReconciler) Reconcile(ctx context.Context, req ctrl.Requ reconcilePreEval := func() (common.KeptnState, error) { return r.reconcilePrePostEvaluation(ctx, appVersion, common.PreDeploymentEvaluationCheckType) } - return r.handlePhase(ctx, ctxAppTrace, appVersion, phase, span, appVersion.IsPreDeploymentEvaluationFailed, reconcilePreEval) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, appVersion, phase, span, reconcilePreEval) + if !result.Continue { + return result.Result, err + } } phase = common.PhaseAppDeployment @@ -127,8 +156,10 @@ func (r *KeptnAppVersionReconciler) Reconcile(ctx context.Context, req ctrl.Requ reconcileAppDep := func() (common.KeptnState, error) { return r.reconcileWorkloads(ctx, appVersion) } - return r.handlePhase(ctx, ctxAppTrace, appVersion, phase, span, appVersion.AreWorkloadsFailed, reconcileAppDep) - + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, appVersion, phase, span, reconcileAppDep) + if !result.Continue { + return result.Result, err + } } phase = common.PhaseAppPostDeployment @@ -136,7 +167,10 @@ func (r *KeptnAppVersionReconciler) Reconcile(ctx context.Context, req ctrl.Requ reconcilePostDep := func() (common.KeptnState, error) { return r.reconcilePrePostDeployment(ctx, appVersion, common.PostDeploymentCheckType) } - return r.handlePhase(ctx, ctxAppTrace, appVersion, phase, span, appVersion.IsPostDeploymentFailed, reconcilePostDep) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, appVersion, phase, span, reconcilePostDep) + if !result.Continue { + return result.Result, err + } } phase = common.PhaseAppPostEvaluation @@ -144,10 +178,13 @@ func (r *KeptnAppVersionReconciler) Reconcile(ctx context.Context, req ctrl.Requ reconcilePostEval := func() (common.KeptnState, error) { return r.reconcilePrePostEvaluation(ctx, appVersion, common.PostDeploymentEvaluationCheckType) } - return r.handlePhase(ctx, ctxAppTrace, appVersion, phase, span, appVersion.IsPostDeploymentEvaluationFailed, reconcilePostEval) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, appVersion, phase, span, reconcilePostEval) + if !result.Continue { + return result.Result, err + } } - r.recordEvent(phase, "Normal", appVersion, "Finished", "is finished") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", appVersion, "Finished", "is finished", appVersion.GetVersion()) err = r.Client.Status().Update(ctx, appVersion) if err != nil { span.SetStatus(codes.Error, err.Error()) @@ -168,11 +205,6 @@ func (r *KeptnAppVersionReconciler) Reconcile(ctx context.Context, req ctrl.Requ attrs := appVersion.GetMetricsAttributes() - r.Log.Info("Increasing app count") - - // metrics: increment app counter - r.Meters.AppCount.Add(ctx, 1, attrs...) - // metrics: add app duration duration := appVersion.Status.EndTime.Time.Sub(appVersion.Status.StartTime.Time) r.Meters.AppDuration.Record(ctx, duration.Seconds(), attrs...) @@ -187,99 +219,6 @@ func (r *KeptnAppVersionReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func (r *KeptnAppVersionReconciler) recordEvent(phase common.KeptnPhaseType, eventType string, appVersion *klcv1alpha1.KeptnAppVersion, shortReason string, longReason string) { - r.Recorder.Event(appVersion, eventType, fmt.Sprintf("%s%s", phase.ShortName, shortReason), fmt.Sprintf("%s %s / Namespace: %s, Name: %s, Version: %s ", phase.LongName, longReason, appVersion.Namespace, appVersion.Name, appVersion.Spec.Version)) -} - -func (r *KeptnAppVersionReconciler) handlePhase(ctx context.Context, ctxAppTrace context.Context, appVersion *klcv1alpha1.KeptnAppVersion, phase common.KeptnPhaseType, span trace.Span, phaseFailed func() bool, reconcilePhase func() (common.KeptnState, error)) (ctrl.Result, error) { - - oldStatus := appVersion.Status.Status - newStatus := oldStatus - statusUpdated := false - - r.Log.Info(phase.LongName + " not finished") - ctxAppTrace, spanAppTrace := r.getSpan(ctxAppTrace, appVersion, phase.ShortName) - - oldPhase := appVersion.Status.CurrentPhase - appVersion.Status.CurrentPhase = phase.ShortName - if phaseFailed() { //TODO eventually we should decide whether a task returns FAILED, currently we never have this status set - r.recordEvent(phase, "Warning", appVersion, "Failed", "has failed") - return ctrl.Result{Requeue: true, RequeueAfter: 60 * time.Second}, nil - } - state, err := reconcilePhase() - if err != nil { - spanAppTrace.AddEvent(phase.LongName + " could not get reconciled") - r.recordEvent(phase, "Warning", appVersion, "ReconcileErrored", "could not get reconciled") - span.SetStatus(codes.Error, err.Error()) - return ctrl.Result{Requeue: true}, err - } - if state.IsSucceeded() { - newStatus = common.StateSucceeded - spanAppTrace.AddEvent(phase.LongName + " has succeeded") - spanAppTrace.SetStatus(codes.Ok, "Succeeded") - spanAppTrace.End() - r.unbindSpan(appVersion, phase.ShortName) - r.recordEvent(phase, "Normal", appVersion, "Succeeded", "has succeeded") - } else if state.IsFailed() { - - appVersion.SetEndTime() - attrs := appVersion.GetMetricsAttributes() - r.Meters.AppCount.Add(ctx, 1, attrs...) - - newStatus = common.StateFailed - - spanAppTrace.AddEvent(phase.LongName + " has failed") - spanAppTrace.SetStatus(codes.Error, "Failed") - spanAppTrace.End() - r.unbindSpan(appVersion, phase.ShortName) - - r.recordEvent(phase, "Warning", appVersion, "Failed", "has failed") - } else { - newStatus = common.StateProgressing - r.recordEvent(phase, "Warning", appVersion, "NotFinished", "has not finished") - } - - // check if status changed - if oldPhase != appVersion.Status.CurrentPhase { - ctx, spanAppTrace = r.getSpan(ctxAppTrace, appVersion, appVersion.Status.CurrentPhase) - semconv.AddAttributeFromAppVersion(spanAppTrace, *appVersion) - statusUpdated = true - } - if oldStatus != newStatus { - appVersion.Status.Status = newStatus - statusUpdated = true - } - - if statusUpdated { - if err := r.Status().Update(ctx, appVersion); err != nil { - r.Log.Error(err, "could not update status") - } - } - return ctrl.Result{Requeue: true, RequeueAfter: 5 * time.Second}, nil -} - -func (r *KeptnAppVersionReconciler) getSpanName(appv *klcv1alpha1.KeptnAppVersion, phase string) string { - return fmt.Sprintf("%s.%s.%s.%s", appv.Spec.TraceId, appv.Spec.AppName, appv.Spec.Version, phase) -} - -func (r *KeptnAppVersionReconciler) getSpan(ctx context.Context, appv *klcv1alpha1.KeptnAppVersion, phase string) (context.Context, trace.Span) { - appvName := r.getSpanName(appv, phase) - if r.bindCRDSpan == nil { - r.bindCRDSpan = make(map[string]trace.Span) - } - if span, ok := r.bindCRDSpan[appvName]; ok { - return ctx, span - } - ctx, span := r.Tracer.Start(ctx, phase, trace.WithSpanKind(trace.SpanKindConsumer)) - r.Log.Info("DEBUG: Created span " + appvName) - r.bindCRDSpan[appvName] = span - return ctx, span -} - -func (r *KeptnAppVersionReconciler) unbindSpan(appv *klcv1alpha1.KeptnAppVersion, phase string) { - delete(r.bindCRDSpan, r.getSpanName(appv, phase)) -} - func (r *KeptnAppVersionReconciler) GetActiveApps(ctx context.Context) ([]common.GaugeValue, error) { appInstances := &klcv1alpha1.KeptnAppVersionList{} err := r.List(ctx, appInstances) diff --git a/operator/controllers/keptnappversion/reconcile_prepostdeployment.go b/operator/controllers/keptnappversion/reconcile_prepostdeployment.go index cee11b3b8b..bf303e03fb 100644 --- a/operator/controllers/keptnappversion/reconcile_prepostdeployment.go +++ b/operator/controllers/keptnappversion/reconcile_prepostdeployment.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/semconv" + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/trace" @@ -71,12 +72,12 @@ func (r *KeptnAppVersionReconciler) reconcileTasks(ctx context.Context, checkTyp } } - taskStatus := GetTaskStatus(taskDefinitionName, statuses) + taskStatus := controllercommon.GetTaskStatus(taskDefinitionName, statuses) task := &klcv1alpha1.KeptnTask{} taskExists := false if oldstatus != taskStatus.Status { - r.recordEvent(phase, "Normal", appVersion, "TaskStatusChanged", fmt.Sprintf("task status changed from %s to %s", oldstatus, taskStatus.Status)) + controllercommon.RecordEvent(r.Recorder, phase, "Normal", appVersion, "TaskStatusChanged", fmt.Sprintf("task status changed from %s to %s", oldstatus, taskStatus.Status), appVersion.GetVersion()) } // Check if task has already succeeded or failed @@ -119,7 +120,7 @@ func (r *KeptnAppVersionReconciler) reconcileTasks(ctx context.Context, checkTyp summary = common.UpdateStatusSummary(ns.Status, summary) } if common.GetOverallState(summary) != common.StateSucceeded { - r.recordEvent(phase, "Warning", appVersion, "NotFinished", "has not finished") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", appVersion, "NotFinished", "has not finished", appVersion.GetVersion()) } return newStatus, summary, nil } @@ -163,23 +164,10 @@ func (r *KeptnAppVersionReconciler) createKeptnTask(ctx context.Context, namespa err = r.Client.Create(ctx, newTask) if err != nil { r.Log.Error(err, "could not create KeptnTask") - r.recordEvent(phase, "Warning", appVersion, "CreateFailed", "could not create KeptnTask") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", appVersion, "CreateFailed", "could not create KeptnTask", appVersion.GetVersion()) return "", err } - r.recordEvent(phase, "Normal", appVersion, "Created", "created") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", appVersion, "Created", "created", appVersion.GetVersion()) return newTask.Name, nil } - -func GetTaskStatus(taskName string, instanceStatus []klcv1alpha1.TaskStatus) klcv1alpha1.TaskStatus { - for _, status := range instanceStatus { - if status.TaskDefinitionName == taskName { - return status - } - } - return klcv1alpha1.TaskStatus{ - TaskDefinitionName: taskName, - Status: common.StatePending, - TaskName: "", - } -} diff --git a/operator/controllers/keptnappversion/reconcile_prepostevaluation.go b/operator/controllers/keptnappversion/reconcile_prepostevaluation.go index b20c850e71..55d2f80955 100644 --- a/operator/controllers/keptnappversion/reconcile_prepostevaluation.go +++ b/operator/controllers/keptnappversion/reconcile_prepostevaluation.go @@ -8,6 +8,7 @@ import ( klcv1alpha1 "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/semconv" + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/trace" @@ -71,12 +72,12 @@ func (r *KeptnAppVersionReconciler) reconcileEvaluations(ctx context.Context, ch } } - evaluationStatus := GetEvaluationStatus(evaluationName, statuses) + evaluationStatus := controllercommon.GetEvaluationStatus(evaluationName, statuses) evaluation := &klcv1alpha1.KeptnEvaluation{} evaluationExists := false if oldstatus != evaluationStatus.Status { - r.recordEvent(phase, "Normal", appVersion, "EvaluationStatusChanged", fmt.Sprintf("evaluation status changed from %s to %s", oldstatus, evaluationStatus.Status)) + controllercommon.RecordEvent(r.Recorder, phase, "Normal", appVersion, "EvaluationStatusChanged", fmt.Sprintf("evaluation status changed from %s to %s", oldstatus, evaluationStatus.Status), appVersion.GetVersion()) } // Check if evaluation has already succeeded or failed @@ -119,7 +120,7 @@ func (r *KeptnAppVersionReconciler) reconcileEvaluations(ctx context.Context, ch summary = common.UpdateStatusSummary(ns.Status, summary) } if common.GetOverallState(summary) != common.StateSucceeded { - r.recordEvent(phase, "Warning", appVersion, "NotFinished", "has not finished") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", appVersion, "NotFinished", "has not finished", appVersion.GetVersion()) } return newStatus, summary, nil } @@ -164,23 +165,10 @@ func (r *KeptnAppVersionReconciler) createKeptnEvaluation(ctx context.Context, n err = r.Client.Create(ctx, newEvaluation) if err != nil { r.Log.Error(err, "could not create KeptnEvaluation") - r.recordEvent(phase, "Warning", appVersion, "CreateFailed", "could not create KeptnEvaluation") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", appVersion, "CreateFailed", "could not create KeptnEvaluation", appVersion.GetVersion()) return "", err } - r.recordEvent(phase, "Normal", appVersion, "Created", "created") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", appVersion, "Created", "created", appVersion.GetVersion()) return newEvaluation.Name, nil } - -func GetEvaluationStatus(evaluationName string, instanceStatus []klcv1alpha1.EvaluationStatus) klcv1alpha1.EvaluationStatus { - for _, status := range instanceStatus { - if status.EvaluationDefinitionName == evaluationName { - return status - } - } - return klcv1alpha1.EvaluationStatus{ - EvaluationDefinitionName: evaluationName, - Status: common.StatePending, - EvaluationName: "", - } -} diff --git a/operator/controllers/keptnappversion/reconcile_workloadsstate.go b/operator/controllers/keptnappversion/reconcile_workloadsstate.go index c41122b95a..9c254b9345 100644 --- a/operator/controllers/keptnappversion/reconcile_workloadsstate.go +++ b/operator/controllers/keptnappversion/reconcile_workloadsstate.go @@ -2,10 +2,10 @@ package keptnappversion import ( "context" - "fmt" klcv1alpha1 "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" ) @@ -15,12 +15,17 @@ func (r *KeptnAppVersionReconciler) reconcileWorkloads(ctx context.Context, appV var summary common.StatusSummary summary.Total = len(appVersion.Spec.Workloads) + phase := common.KeptnPhaseType{ + ShortName: "ReconcileWorkload", + LongName: "Reconcile Workloads", + } + var newStatus []klcv1alpha1.WorkloadStatus for _, w := range appVersion.Spec.Workloads { r.Log.Info("Reconciling workload " + w.Name) workload, err := r.getWorkloadInstance(ctx, getWorkloadInstanceName(appVersion.Namespace, appVersion.Spec.AppName, w.Name, w.Version)) if err != nil && errors.IsNotFound(err) { - r.Recorder.Event(appVersion, "Warning", "WorkloadNotFound", fmt.Sprintf("Could not find KeptnWorkloadInstance / Namespace: %s, Name: %s ", appVersion.Namespace, w.Name)) + controllercommon.RecordEvent(r.Recorder, phase, "Warning", appVersion, "NotFound", "workloadInstance not found", appVersion.GetVersion()) workload.Status.Status = common.StatePending } else if err != nil { r.Log.Error(err, "Could not get workload") diff --git a/operator/controllers/keptnevaluation/controller.go b/operator/controllers/keptnevaluation/controller.go index 997606a200..bf9499ef9b 100644 --- a/operator/controllers/keptnevaluation/controller.go +++ b/operator/controllers/keptnevaluation/controller.go @@ -83,7 +83,7 @@ func (r *KeptnEvaluationReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, nil } r.Log.Error(err, "Failed to get the KeptnEvaluation") - return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil + return ctrl.Result{}, nil } traceContextCarrier := propagation.MapCarrier(evaluation.Annotations) @@ -114,7 +114,7 @@ func (r *KeptnEvaluationReconciler) Reconcile(ctx context.Context, req ctrl.Requ if err != nil { if errors.IsNotFound(err) { r.Log.Info(err.Error() + ", ignoring error since object must be deleted") - return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil + return ctrl.Result{Requeue: true, RequeueAfter: 10 * time.Second}, nil } r.Log.Error(err, "Failed to retrieve a resource") return ctrl.Result{}, nil @@ -147,7 +147,7 @@ func (r *KeptnEvaluationReconciler) Reconcile(ctx context.Context, req ctrl.Requ if common.GetOverallState(statusSummary) == common.StateSucceeded { evaluation.Status.OverallStatus = common.StateSucceeded } else { - evaluation.Status.OverallStatus = common.StatePending + evaluation.Status.OverallStatus = common.StateProgressing } } diff --git a/operator/controllers/keptntask/controller.go b/operator/controllers/keptntask/controller.go index 4bd006d535..e399f991f5 100644 --- a/operator/controllers/keptntask/controller.go +++ b/operator/controllers/keptntask/controller.go @@ -79,12 +79,18 @@ func (r *KeptnTaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( task.SetStartTime() - err := r.Client.Status().Update(ctx, task) - if err != nil { - span.SetStatus(codes.Error, err.Error()) - return ctrl.Result{Requeue: true}, err + if task.Status.Status.IsPending() { + task.Status.Status = common.StateProgressing } + defer func(task *klcv1alpha1.KeptnTask) { + err := r.Client.Status().Update(ctx, task) + if err != nil { + r.Log.Error(err, "could not update status") + } + + }(task) + jobExists, err := r.JobExists(ctx, *task, req.Namespace) if err != nil { r.Log.Error(err, "Could not check if job is running") @@ -115,12 +121,6 @@ func (r *KeptnTaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // Task is completed at this place task.SetEndTime() - err = r.Client.Status().Update(ctx, task) - if err != nil { - span.SetStatus(codes.Error, err.Error()) - return ctrl.Result{Requeue: true}, err - } - attrs := task.GetMetricsAttributes() r.Log.Info("Increasing task count") diff --git a/operator/controllers/keptnworkloadinstance/controller.go b/operator/controllers/keptnworkloadinstance/controller.go index d407513b64..dbebe91066 100644 --- a/operator/controllers/keptnworkloadinstance/controller.go +++ b/operator/controllers/keptnworkloadinstance/controller.go @@ -30,7 +30,6 @@ import ( "go.opentelemetry.io/otel/trace" "github.com/go-logr/logr" - "github.com/google/uuid" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -40,6 +39,7 @@ import ( klcv1alpha1 "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -53,7 +53,7 @@ type KeptnWorkloadInstanceReconciler struct { Log logr.Logger Meters common.KeptnMeters Tracer trace.Tracer - bindCRDSpan map[string]trace.Span + SpanHandler controllercommon.SpanHandler } //+kubebuilder:rbac:groups=lifecycle.keptn.sh,resources=keptnworkloadinstances,verbs=get;list;watch;create;update;patch;delete @@ -101,17 +101,26 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr workloadInstance.SetStartTime() + defer func(span trace.Span, workloadInstance *klcv1alpha1.KeptnWorkloadInstance) { + if workloadInstance.IsEndTimeSet() { + r.Log.Info("Increasing deployment count") + attrs := workloadInstance.GetMetricsAttributes() + r.Meters.AppCount.Add(ctx, 1, attrs...) + } + span.End() + }(span, workloadInstance) + //Wait for pre-evaluation checks of App phase := common.PhaseAppPreEvaluation found, appVersion, err := r.getAppVersionForWorkloadInstance(ctx, workloadInstance) if err != nil { span.SetStatus(codes.Error, err.Error()) - r.recordEvent(phase, "Warning", workloadInstance, "GetAppVersionFailed", "has failed since app could not be retrieved") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", workloadInstance, "GetAppVersionFailed", "has failed since app could not be retrieved", workloadInstance.GetVersion()) return reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second}, fmt.Errorf("could not fetch AppVersion for KeptnWorkloadInstance: %+v", err) } else if !found { span.SetStatus(codes.Error, "app could not be found") - r.recordEvent(phase, "Warning", workloadInstance, "AppVersionNotFound", "has failed since app could not be found") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", workloadInstance, "AppVersionNotFound", "has failed since app could not be found", workloadInstance.GetVersion()) return reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second}, fmt.Errorf("could not find AppVersion for KeptnWorkloadInstance") } @@ -121,110 +130,102 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr appPreEvalStatus := appVersion.Status.PreDeploymentEvaluationStatus if !appPreEvalStatus.IsSucceeded() { if appPreEvalStatus.IsFailed() { - r.recordEvent(phase, "Warning", workloadInstance, "Failed", "has failed since app has failed") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", workloadInstance, "Failed", "has failed since app has failed", workloadInstance.GetVersion()) return ctrl.Result{Requeue: true, RequeueAfter: 60 * time.Second}, nil } - r.recordEvent(phase, "Normal", workloadInstance, "NotFinished", "Pre evaluations tasks for app not finished") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "NotFinished", "Pre evaluations tasks for app not finished", workloadInstance.GetVersion()) return ctrl.Result{Requeue: true, RequeueAfter: 20 * time.Second}, nil } + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "FinishedSuccess", "Pre evaluations tasks for app have finished successfully", workloadInstance.GetVersion()) + //Wait for pre-deployment checks of Workload phase = common.PhaseWorkloadPreDeployment - saveState := false - //Set state to progressing if not already set - if workloadInstance.Status.PreDeploymentStatus == common.StatePending { - workloadInstance.Status.PreDeploymentStatus = common.StateProgressing - saveState = true + phaseHandler := controllercommon.PhaseHandler{ + Client: r.Client, + Recorder: r.Recorder, + Log: r.Log, + SpanHandler: r.SpanHandler, } + // set the App trace id if not already set if len(workloadInstance.Spec.TraceId) < 1 { workloadInstance.Spec.TraceId = appVersion.Spec.TraceId - saveState = true - } - if saveState { - if err := r.Status().Update(ctx, workloadInstance); err != nil { + if err := r.Update(ctx, workloadInstance); err != nil { return ctrl.Result{}, err } } - if appVersion.Status.CurrentPhase == "" { - r.unbindSpan(workloadInstance, phase.ShortName) + + if workloadInstance.Status.CurrentPhase == "" { + if err := r.SpanHandler.UnbindSpan(workloadInstance, phase.ShortName); err != nil { + r.Log.Error(err, "cannot unbind span") + } var spanAppTrace trace.Span - ctxAppTrace, spanAppTrace = r.getSpan(ctxAppTrace, workloadInstance, phase.ShortName) - semconv.AddAttributeFromAppVersion(spanAppTrace, appVersion) + ctxAppTrace, spanAppTrace, err = r.SpanHandler.GetSpan(ctxAppTrace, r.Tracer, workloadInstance, phase.ShortName) + if err != nil { + r.Log.Error(err, "could not get span") + } + semconv.AddAttributeFromWorkloadInstance(spanAppTrace, *workloadInstance) spanAppTrace.AddEvent("WorkloadInstance Pre-Deployment Tasks started", trace.WithTimestamp(time.Now())) - r.recordEvent(phase, "Normal", workloadInstance, "Started", "have started") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "Started", "have started", workloadInstance.GetVersion()) } if !workloadInstance.IsPreDeploymentSucceeded() { reconcilePre := func() (common.KeptnState, error) { return r.reconcilePrePostDeployment(ctx, workloadInstance, common.PreDeploymentCheckType) } - return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPreDeploymentFailed, reconcilePre) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, workloadInstance, phase, span, reconcilePre) + if !result.Continue { + return result.Result, err + } } //Wait for pre-evaluation checks of Workload phase = common.PhaseAppPreEvaluation - - //Set state to progressing if not already set - if workloadInstance.Status.PreDeploymentEvaluationStatus == common.StatePending { - workloadInstance.Status.PreDeploymentEvaluationStatus = common.StateProgressing - if err := r.Status().Update(ctx, workloadInstance); err != nil { - return ctrl.Result{}, err - } - } if !workloadInstance.IsPreDeploymentEvaluationSucceeded() { reconcilePreEval := func() (common.KeptnState, error) { return r.reconcilePrePostEvaluation(ctx, workloadInstance, common.PreDeploymentEvaluationCheckType) } - return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPreDeploymentEvaluationFailed, reconcilePreEval) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, workloadInstance, phase, span, reconcilePreEval) + if !result.Continue { + return result.Result, err + } } //Wait for deployment of Workload phase = common.PhaseWorkloadDeployment - //Set state to progressing if not already set - if workloadInstance.Status.DeploymentStatus == common.StatePending { - workloadInstance.Status.DeploymentStatus = common.StateProgressing - if err := r.Status().Update(ctx, workloadInstance); err != nil { - return ctrl.Result{}, err - } - } if !workloadInstance.IsDeploymentSucceeded() { reconcileWorkloadInstance := func() (common.KeptnState, error) { return r.reconcileDeployment(ctx, workloadInstance) } - return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsDeploymentFailed, reconcileWorkloadInstance) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, workloadInstance, phase, span, reconcileWorkloadInstance) + if !result.Continue { + return result.Result, err + } } //Wait for post-deployment checks of Workload phase = common.PhaseWorkloadPostDeployment - //Set state to progressing if not already set - if workloadInstance.Status.PostDeploymentStatus == common.StatePending { - workloadInstance.Status.PostDeploymentStatus = common.StateProgressing - if err := r.Status().Update(ctx, workloadInstance); err != nil { - return ctrl.Result{}, err - } - } if !workloadInstance.IsPostDeploymentSucceeded() { reconcilePostDeployment := func() (common.KeptnState, error) { return r.reconcilePrePostDeployment(ctx, workloadInstance, common.PostDeploymentCheckType) } - return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPostDeploymentFailed, reconcilePostDeployment) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, workloadInstance, phase, span, reconcilePostDeployment) + if !result.Continue { + return result.Result, err + } } //Wait for post-evaluation checks of Workload phase = common.PhaseAppPostEvaluation - //Set state to progressing if not already set - if workloadInstance.Status.PostDeploymentStatus == common.StatePending { - workloadInstance.Status.PostDeploymentStatus = common.StateProgressing - if err := r.Status().Update(ctx, workloadInstance); err != nil { - return ctrl.Result{}, err - } - } if !workloadInstance.IsPostDeploymentEvaluationSucceeded() { reconcilePostEval := func() (common.KeptnState, error) { return r.reconcilePrePostEvaluation(ctx, workloadInstance, common.PostDeploymentEvaluationCheckType) } - return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPostDeploymentEvaluationFailed, reconcilePostEval) + result, err := phaseHandler.HandlePhase(ctx, ctxAppTrace, r.Tracer, workloadInstance, phase, span, reconcilePostEval) + if !result.Continue { + return result.Result, err + } } // WorkloadInstance is completed at this place @@ -242,15 +243,11 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr attrs := workloadInstance.GetMetricsAttributes() - r.Log.Info("Increasing deployment count") - // metrics: increment deployment counter - r.Meters.DeploymentCount.Add(ctx, 1, attrs...) - // metrics: add deployment duration duration := workloadInstance.Status.EndTime.Time.Sub(workloadInstance.Status.StartTime.Time) r.Meters.DeploymentDuration.Record(ctx, duration.Seconds(), attrs...) - r.recordEvent(phase, "Normal", workloadInstance, "Finished", "is finished") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "Finished", "is finished", workloadInstance.GetVersion()) return ctrl.Result{}, nil } @@ -278,68 +275,6 @@ func (r *KeptnWorkloadInstanceReconciler) GetActiveDeployments(ctx context.Conte return res, nil } -func (r *KeptnWorkloadInstanceReconciler) handlePhase(ctx context.Context, ctxAppTrace context.Context, workloadInstance *klcv1alpha1.KeptnWorkloadInstance, phase common.KeptnPhaseType, span trace.Span, phaseFailed func() bool, reconcilePhase func() (common.KeptnState, error)) (ctrl.Result, error) { - r.Log.Info(phase.LongName + " not finished") - overallStateUpdated := false - oldstate := workloadInstance.Status.Status - oldPhase := workloadInstance.Status.CurrentPhase - workloadInstance.Status.CurrentPhase = phase.ShortName - - ctxAppTrace, spanAppTrace := r.getSpan(ctxAppTrace, workloadInstance, phase.ShortName) - - if phaseFailed() { //TODO eventually we should decide whether a task returns FAILED, currently we never have this status set - r.recordEvent(phase, "Warning", workloadInstance, "Failed", "has failed") - return ctrl.Result{Requeue: true, RequeueAfter: 60 * time.Second}, nil - } - state, err := reconcilePhase() - if err != nil { - spanAppTrace.AddEvent(phase.LongName + " could not get reconciled") - r.recordEvent(phase, "Warning", workloadInstance, "ReconcileErrored", "could not get reconciled") - span.SetStatus(codes.Error, err.Error()) - return ctrl.Result{Requeue: true}, err - } - if state.IsSucceeded() { - spanAppTrace.AddEvent(phase.LongName + " has succeeded") - spanAppTrace.SetStatus(codes.Ok, "Succeeded") - spanAppTrace.End() - r.unbindSpan(workloadInstance, phase.ShortName) - r.recordEvent(phase, "Normal", workloadInstance, "Succeeded", "has succeeded") - } else if state.IsFailed() { - r.recordEvent(phase, "Warning", workloadInstance, "Failed", "has failed") - workloadInstance.Status.Status = common.StateFailed - workloadInstance.SetEndTime() - - attrs := workloadInstance.GetMetricsAttributes() - r.Meters.DeploymentCount.Add(ctx, 1, attrs...) - - spanAppTrace.AddEvent(phase.LongName + " has failed") - spanAppTrace.SetStatus(codes.Error, "Failed") - spanAppTrace.End() - r.unbindSpan(workloadInstance, phase.ShortName) - - overallStateUpdated = true - } else { - if oldstate != common.StateProgressing { - workloadInstance.Status.Status = common.StateProgressing - overallStateUpdated = true - } - spanAppTrace.AddEvent(phase.LongName + " not finished") - r.recordEvent(phase, "Warning", workloadInstance, "NotFinished", "has not finished") - } - if oldPhase != workloadInstance.Status.CurrentPhase { - ctxAppTrace, spanAppTrace = r.getSpan(ctxAppTrace, workloadInstance, workloadInstance.Status.CurrentPhase) - semconv.AddAttributeFromWorkloadInstance(spanAppTrace, *workloadInstance) - overallStateUpdated = true - } - - if overallStateUpdated { - if err := r.Status().Update(ctx, workloadInstance); err != nil { - r.Log.Error(err, "could not update status") - } - } - return ctrl.Result{Requeue: true, RequeueAfter: 5 * time.Second}, nil -} - // SetupWithManager sets up the controller with the Manager. func (r *KeptnWorkloadInstanceReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). @@ -348,19 +283,6 @@ func (r *KeptnWorkloadInstanceReconciler) SetupWithManager(mgr ctrl.Manager) err Complete(r) } -func (r *KeptnWorkloadInstanceReconciler) generateSuffix() string { - uid := uuid.New().String() - return uid[:10] -} - -func (r *KeptnWorkloadInstanceReconciler) recordEvent(phase common.KeptnPhaseType, eventType string, workloadInstance *klcv1alpha1.KeptnWorkloadInstance, shortReason string, longReason string) { - r.Recorder.Event(workloadInstance, eventType, fmt.Sprintf("%s%s", phase.ShortName, shortReason), fmt.Sprintf("%s %s / Namespace: %s, Name: %s, Version: %s ", phase.LongName, longReason, workloadInstance.Namespace, workloadInstance.Name, workloadInstance.Spec.Version)) -} - -func GetAppVersionName(namespace string, appName string, version string) types.NamespacedName { - return types.NamespacedName{Namespace: namespace, Name: appName + "-" + version} -} - func (r *KeptnWorkloadInstanceReconciler) getAppVersion(ctx context.Context, appName types.NamespacedName) (*klcv1alpha1.KeptnAppVersion, error) { app := &klcv1alpha1.KeptnApp{} err := r.Get(ctx, appName, app) @@ -369,7 +291,7 @@ func (r *KeptnWorkloadInstanceReconciler) getAppVersion(ctx context.Context, app } appVersion := &klcv1alpha1.KeptnAppVersion{} - err = r.Get(ctx, GetAppVersionName(appName.Namespace, appName.Name, app.Spec.Version), appVersion) + err = r.Get(ctx, controllercommon.GetAppVersionName(appName.Namespace, appName.Name, app.Spec.Version), appVersion) return appVersion, err } @@ -403,30 +325,6 @@ func (r *KeptnWorkloadInstanceReconciler) getAppVersionForWorkloadInstance(ctx c return true, latestVersion, nil } -func (r *KeptnWorkloadInstanceReconciler) getSpan(ctx context.Context, wli *klcv1alpha1.KeptnWorkloadInstance, phase string) (context.Context, trace.Span) { - wliName := r.getSpanName(wli, phase) - spanName := fmt.Sprintf("%s/%s", wli.Spec.WorkloadName, phase) - - if r.bindCRDSpan == nil { - r.bindCRDSpan = make(map[string]trace.Span) - } - if span, ok := r.bindCRDSpan[wliName]; ok { - return ctx, span - } - r.Log.Info("DEBUG: Start Span: " + wliName) - ctx, span := r.Tracer.Start(ctx, spanName, trace.WithSpanKind(trace.SpanKindConsumer)) - r.bindCRDSpan[wliName] = span - return ctx, span -} - -func (r *KeptnWorkloadInstanceReconciler) unbindSpan(wli *klcv1alpha1.KeptnWorkloadInstance, phase string) { - delete(r.bindCRDSpan, r.getSpanName(wli, phase)) -} - -func (r *KeptnWorkloadInstanceReconciler) getSpanName(wli *klcv1alpha1.KeptnWorkloadInstance, phase string) string { - return fmt.Sprintf("%s.%s.%s.%s.%s", wli.Spec.TraceId, wli.Spec.AppName, wli.Spec.WorkloadName, wli.Spec.Version, phase) -} - func (r *KeptnWorkloadInstanceReconciler) GetDeploymentInterval(ctx context.Context) ([]common.GaugeFloatValue, error) { workloadInstances := &klcv1alpha1.KeptnWorkloadInstanceList{} err := r.List(ctx, workloadInstances) diff --git a/operator/controllers/keptnworkloadinstance/reconcile_deploymentstate.go b/operator/controllers/keptnworkloadinstance/reconcile_deploymentstate.go index 6fab63340b..d29bebc5b0 100644 --- a/operator/controllers/keptnworkloadinstance/reconcile_deploymentstate.go +++ b/operator/controllers/keptnworkloadinstance/reconcile_deploymentstate.go @@ -2,6 +2,7 @@ package keptnworkloadinstance import ( "context" + klcv1alpha1 "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" appsv1 "k8s.io/api/apps/v1" @@ -13,51 +14,52 @@ import ( func (r *KeptnWorkloadInstanceReconciler) reconcileDeployment(ctx context.Context, workloadInstance *klcv1alpha1.KeptnWorkloadInstance) (common.KeptnState, error) { if workloadInstance.Spec.ResourceReference.Kind == "Pod" { - isPodRunning, err := r.isPodRunning(ctx, workloadInstance.Spec.ResourceReference, workloadInstance.Namespace) if err != nil { return common.StateUnknown, err } if isPodRunning { workloadInstance.Status.DeploymentStatus = common.StateSucceeded + } else { + workloadInstance.Status.DeploymentStatus = common.StateProgressing + } + } else { + isReplicaRunning, err := r.isReplicaSetRunning(ctx, workloadInstance.Spec.ResourceReference, workloadInstance.Namespace) + if err != nil { + return common.StateUnknown, err + } + if isReplicaRunning { + workloadInstance.Status.DeploymentStatus = common.StateSucceeded + } else { + workloadInstance.Status.DeploymentStatus = common.StateProgressing } } - isReplicaRunning, count, err := r.isReplicaSetRunning(ctx, workloadInstance.Spec.ResourceReference, workloadInstance.Namespace) - if err != nil { - return common.StateUnknown, err - } - if isReplicaRunning { - workloadInstance.Status.DeploymentStatus = common.StateSucceeded - } else if count > 0 { - workloadInstance.Status.DeploymentStatus = common.StateProgressing - } - - err = r.Client.Status().Update(ctx, workloadInstance) + err := r.Client.Status().Update(ctx, workloadInstance) if err != nil { return common.StateUnknown, err } return workloadInstance.Status.DeploymentStatus, nil } -func (r *KeptnWorkloadInstanceReconciler) isReplicaSetRunning(ctx context.Context, resource klcv1alpha1.ResourceReference, namespace string) (bool, int32, error) { +func (r *KeptnWorkloadInstanceReconciler) isReplicaSetRunning(ctx context.Context, resource klcv1alpha1.ResourceReference, namespace string) (bool, error) { replica := &appsv1.ReplicaSetList{} if err := r.Client.List(ctx, replica, client.InNamespace(namespace)); err != nil { - return false, 0, err + return false, err } for _, re := range replica.Items { if re.UID == resource.UID { replicas, err := r.getDesiredReplicas(ctx, re.OwnerReferences[0], namespace) if err != nil { - return false, re.Status.ReadyReplicas, err + return false, err } if re.Status.ReadyReplicas == replicas { - return true, re.Status.ReadyReplicas, nil + return true, nil } - return false, re.Status.ReadyReplicas, nil + return false, nil } } - return false, 0, nil + return false, nil } diff --git a/operator/controllers/keptnworkloadinstance/reconcile_prepostdeployment.go b/operator/controllers/keptnworkloadinstance/reconcile_prepostdeployment.go index 53598f694e..40cacba73c 100644 --- a/operator/controllers/keptnworkloadinstance/reconcile_prepostdeployment.go +++ b/operator/controllers/keptnworkloadinstance/reconcile_prepostdeployment.go @@ -7,6 +7,7 @@ import ( klcv1alpha1 "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/semconv" + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/trace" @@ -40,15 +41,6 @@ func (r *KeptnWorkloadInstanceReconciler) reconcilePrePostDeployment(ctx context return overallState, nil } -func (r *KeptnWorkloadInstanceReconciler) getKeptnTask(ctx context.Context, taskName string, namespace string) (*klcv1alpha1.KeptnTask, error) { - task := &klcv1alpha1.KeptnTask{} - err := r.Client.Get(ctx, types.NamespacedName{Name: taskName, Namespace: namespace}, task) - if err != nil { - return task, err - } - return task, nil -} - func (r *KeptnWorkloadInstanceReconciler) createKeptnTask(ctx context.Context, namespace string, workloadInstance *klcv1alpha1.KeptnWorkloadInstance, taskDefinition string, checkType common.CheckType) (string, error) { ctx, span := r.Tracer.Start(ctx, fmt.Sprintf("create_%s_deployment_task", checkType), trace.WithSpanKind(trace.SpanKindProducer)) defer span.End() @@ -59,6 +51,12 @@ func (r *KeptnWorkloadInstanceReconciler) createKeptnTask(ctx context.Context, n // follow up with a Keptn propagator that JSON-encoded the OTel map into our own key traceContextCarrier := propagation.MapCarrier{} otel.GetTextMapPropagator().Inject(ctx, traceContextCarrier) + + phase := common.KeptnPhaseType{ + ShortName: "KeptnTaskCreate", + LongName: "Keptn Task Create", + } + newTask := &klcv1alpha1.KeptnTask{ ObjectMeta: metav1.ObjectMeta{ Name: common.GenerateTaskName(checkType, taskDefinition), @@ -82,10 +80,10 @@ func (r *KeptnWorkloadInstanceReconciler) createKeptnTask(ctx context.Context, n err = r.Client.Create(ctx, newTask) if err != nil { r.Log.Error(err, "could not create KeptnTask") - r.Recorder.Event(workloadInstance, "Warning", "KeptnTaskNotCreated", fmt.Sprintf("Could not create KeptnTask / Namespace: %s, Name: %s ", newTask.Namespace, newTask.Name)) + controllercommon.RecordEvent(r.Recorder, phase, "Warning", workloadInstance, "CreateFailed", "could not create KeptnTask", workloadInstance.GetVersion()) return "", err } - r.Recorder.Event(workloadInstance, "Normal", "KeptnTaskCreated", fmt.Sprintf("Created KeptnTask / Namespace: %s, Name: %s ", newTask.Namespace, newTask.Name)) + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "Created", "created", workloadInstance.GetVersion()) return newTask.Name, nil } @@ -121,12 +119,12 @@ func (r *KeptnWorkloadInstanceReconciler) reconcileTasks(ctx context.Context, ch } } - taskStatus := GetTaskStatus(taskDefinitionName, statuses) + taskStatus := controllercommon.GetTaskStatus(taskDefinitionName, statuses) task := &klcv1alpha1.KeptnTask{} taskExists := false if oldstatus != taskStatus.Status { - r.recordEvent(phase, "Normal", workloadInstance, "TaskStatusChanged", fmt.Sprintf("task status changed from %s to %s", oldstatus, taskStatus.Status)) + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "TaskStatusChanged", fmt.Sprintf("task status changed from %s to %s", oldstatus, taskStatus.Status), workloadInstance.GetVersion()) } // Check if task has already succeeded or failed @@ -169,20 +167,7 @@ func (r *KeptnWorkloadInstanceReconciler) reconcileTasks(ctx context.Context, ch summary = common.UpdateStatusSummary(ns.Status, summary) } if common.GetOverallState(summary) != common.StateSucceeded { - r.Recorder.Event(workloadInstance, "Warning", "TasksNotFinished", fmt.Sprintf("Tasks have not finished / Namespace: %s, Name: %s, Summary: %v ", workloadInstance.Namespace, workloadInstance.Name, summary)) + controllercommon.RecordEvent(r.Recorder, phase, "Warning", workloadInstance, "NotFinished", "tasks have not finished", workloadInstance.GetVersion()) } return newStatus, summary, nil } - -func GetTaskStatus(taskName string, instanceStatus []klcv1alpha1.TaskStatus) klcv1alpha1.TaskStatus { - for _, status := range instanceStatus { - if status.TaskDefinitionName == taskName { - return status - } - } - return klcv1alpha1.TaskStatus{ - TaskDefinitionName: taskName, - Status: common.StatePending, - TaskName: "", - } -} diff --git a/operator/controllers/keptnworkloadinstance/reconcile_prepostevaluation.go b/operator/controllers/keptnworkloadinstance/reconcile_prepostevaluation.go index 412c06319f..376959b47b 100644 --- a/operator/controllers/keptnworkloadinstance/reconcile_prepostevaluation.go +++ b/operator/controllers/keptnworkloadinstance/reconcile_prepostevaluation.go @@ -8,6 +8,7 @@ import ( klcv1alpha1 "github.com/keptn/lifecycle-controller/operator/api/v1alpha1" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/common" "github.com/keptn/lifecycle-controller/operator/api/v1alpha1/semconv" + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/trace" @@ -71,12 +72,12 @@ func (r *KeptnWorkloadInstanceReconciler) reconcileEvaluations(ctx context.Conte } } - evaluationStatus := GetEvaluationStatus(evaluationName, statuses) + evaluationStatus := controllercommon.GetEvaluationStatus(evaluationName, statuses) evaluation := &klcv1alpha1.KeptnEvaluation{} evaluationExists := false if oldstatus != evaluationStatus.Status { - r.recordEvent(phase, "Normal", workloadInstance, "EvaluationStatusChanged", fmt.Sprintf("evaluation status changed from %s to %s", oldstatus, evaluationStatus.Status)) + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "EvaluationStatusChanged", fmt.Sprintf("evaluation status changed from %s to %s", oldstatus, evaluationStatus.Status), workloadInstance.GetVersion()) } // Check if evaluation has already succeeded or failed @@ -119,7 +120,7 @@ func (r *KeptnWorkloadInstanceReconciler) reconcileEvaluations(ctx context.Conte summary = common.UpdateStatusSummary(ns.Status, summary) } if common.GetOverallState(summary) != common.StateSucceeded { - r.recordEvent(phase, "Warning", workloadInstance, "NotFinished", "has not finished") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", workloadInstance, "NotFinished", "has not finished", workloadInstance.GetVersion()) } return newStatus, summary, nil } @@ -164,23 +165,10 @@ func (r *KeptnWorkloadInstanceReconciler) createKeptnEvaluation(ctx context.Cont err = r.Client.Create(ctx, newEvaluation) if err != nil { r.Log.Error(err, "could not create KeptnEvaluation") - r.recordEvent(phase, "Warning", workloadInstance, "CreateFailed", "could not create KeptnEvaluation") + controllercommon.RecordEvent(r.Recorder, phase, "Warning", workloadInstance, "CreateFailed", "could not create KeptnEvaluation", workloadInstance.GetVersion()) return "", err } - r.recordEvent(phase, "Normal", workloadInstance, "Created", "created") + controllercommon.RecordEvent(r.Recorder, phase, "Normal", workloadInstance, "Created", "created", workloadInstance.GetVersion()) return newEvaluation.Name, nil } - -func GetEvaluationStatus(evaluationName string, instanceStatus []klcv1alpha1.EvaluationStatus) klcv1alpha1.EvaluationStatus { - for _, status := range instanceStatus { - if status.EvaluationDefinitionName == evaluationName { - return status - } - } - return klcv1alpha1.EvaluationStatus{ - EvaluationDefinitionName: evaluationName, - Status: common.StatePending, - EvaluationName: "", - } -} diff --git a/operator/main.go b/operator/main.go index e06caf671b..46b7ff330e 100644 --- a/operator/main.go +++ b/operator/main.go @@ -50,6 +50,8 @@ import ( "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + controllercommon "github.com/keptn/lifecycle-controller/operator/controllers/common" + "os" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" @@ -251,6 +253,8 @@ func main() { os.Exit(1) } + spanHandler := controllercommon.SpanHandler{} + if !disableWebhook { mgr.GetWebhookServer().Register("/mutate-v1-pod", &webhook.Admission{ Handler: &webhooks.PodMutatingWebhook{ @@ -309,12 +313,13 @@ func main() { } workloadInstanceReconciler := &keptnworkloadinstance.KeptnWorkloadInstanceReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Log: ctrl.Log.WithName("KeptnWorkloadInstance Controller"), - Recorder: mgr.GetEventRecorderFor("keptnworkloadinstance-controller"), - Meters: meters, - Tracer: otel.Tracer("keptn/operator/workloadinstance"), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Log: ctrl.Log.WithName("KeptnWorkloadInstance Controller"), + Recorder: mgr.GetEventRecorderFor("keptnworkloadinstance-controller"), + Meters: meters, + Tracer: otel.Tracer("keptn/operator/workloadinstance"), + SpanHandler: spanHandler, } if err = (workloadInstanceReconciler).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "KeptnWorkloadInstance") @@ -322,12 +327,13 @@ func main() { } appVersionReconciler := &keptnappversion.KeptnAppVersionReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Log: ctrl.Log.WithName("KeptnAppVersion Controller"), - Recorder: mgr.GetEventRecorderFor("keptnappversion-controller"), - Tracer: otel.Tracer("keptn/operator/appversion"), - Meters: meters, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Log: ctrl.Log.WithName("KeptnAppVersion Controller"), + Recorder: mgr.GetEventRecorderFor("keptnappversion-controller"), + Tracer: otel.Tracer("keptn/operator/appversion"), + Meters: meters, + SpanHandler: spanHandler, } if err = (appVersionReconciler).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "KeptnAppVersion")