Skip to content

Commit

Permalink
Add WLI tracing
Browse files Browse the repository at this point in the history
Signed-off-by: Giovanni Liva <[email protected]>
  • Loading branch information
thisthat committed Oct 24, 2022
1 parent c23ffbf commit 145d6bb
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 14 deletions.
5 changes: 3 additions & 2 deletions operator/api/v1alpha1/keptnworkloadinstance_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ import (
// KeptnWorkloadInstanceSpec defines the desired state of KeptnWorkloadInstance
type KeptnWorkloadInstanceSpec struct {
KeptnWorkloadSpec `json:",inline"`
WorkloadName string `json:"workloadName"`
PreviousVersion string `json:"previousVersion,omitempty"`
WorkloadName string `json:"workloadName"`
PreviousVersion string `json:"previousVersion,omitempty"`
TraceId map[string]string `json:"traceId,omitempty"`
}

// KeptnWorkloadInstanceStatus defines the observed state of KeptnWorkloadInstance
Expand Down
81 changes: 69 additions & 12 deletions operator/controllers/keptnworkloadinstance/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ import (
// KeptnWorkloadInstanceReconciler reconciles a KeptnWorkloadInstance object
type KeptnWorkloadInstanceReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
Log logr.Logger
Meters common.KeptnMeters
Tracer trace.Tracer
Scheme *runtime.Scheme
Recorder record.EventRecorder
Log logr.Logger
Meters common.KeptnMeters
Tracer trace.Tracer
bindCRDSpan map[string]trace.Span
}

//+kubebuilder:rbac:groups=lifecycle.keptn.sh,resources=keptnworkloadinstances,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -113,6 +114,9 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr
return reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second}, fmt.Errorf("could not find AppVersion for KeptnWorkloadInstance")
}

appTraceContextCarrier := propagation.MapCarrier(appVersion.Spec.TraceId)
ctxAppTrace := otel.GetTextMapPropagator().Extract(context.TODO(), appTraceContextCarrier)

appPreEvalStatus := appVersion.Status.PreDeploymentEvaluationStatus
if !appPreEvalStatus.IsSucceeded() {
if appPreEvalStatus.IsFailed() {
Expand All @@ -125,23 +129,36 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr

//Wait for pre-deployment checks of Workload
phase = common.PhaseWorkloadPreDeployment

saveState := false
//Set state to progressing if not already set
if workloadInstance.Status.PreDeploymentStatus == common.StatePending {
workloadInstance.Status.PreDeploymentStatus = common.StateProgressing
saveState = true
}
// set the App trace id if not already set
if len(workloadInstance.Spec.TraceId) < 1 {
workloadInstance.Spec.TraceId = appVersion.Spec.TraceId
saveState = true
}
if saveState {
if err := r.Status().Update(ctx, workloadInstance); err != nil {
return ctrl.Result{}, err
}
}
if appVersion.Status.CurrentPhase == "" {
r.unbindSpan(workloadInstance, phase.ShortName)
var spanAppTrace trace.Span
ctxAppTrace, spanAppTrace = r.getSpan(ctxAppTrace, workloadInstance, phase.ShortName)
semconv.AddAttributeFromAppVersion(spanAppTrace, appVersion)
spanAppTrace.AddEvent("WorkloadInstance Pre-Deployment Tasks started", trace.WithTimestamp(time.Now()))
r.recordEvent(phase, "Normal", workloadInstance, "Started", "have started")
}

if !workloadInstance.IsPreDeploymentSucceeded() {
reconcilePre := func() (common.KeptnState, error) {
return r.reconcilePrePostDeployment(ctx, workloadInstance, common.PreDeploymentCheckType)
}
return r.handlePhase(ctx, workloadInstance, phase, span, workloadInstance.IsPreDeploymentFailed, reconcilePre)
return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPreDeploymentFailed, reconcilePre)
}

//Wait for pre-evaluation checks of Workload
Expand All @@ -158,7 +175,7 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr
reconcilePreEval := func() (common.KeptnState, error) {
return r.reconcilePrePostEvaluation(ctx, workloadInstance, common.PreDeploymentEvaluationCheckType)
}
return r.handlePhase(ctx, workloadInstance, phase, span, workloadInstance.IsPreDeploymentEvaluationFailed, reconcilePreEval)
return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPreDeploymentEvaluationFailed, reconcilePreEval)
}

//Wait for deployment of Workload
Expand All @@ -174,7 +191,7 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr
reconcileWorkloadInstance := func() (common.KeptnState, error) {
return r.reconcileDeployment(ctx, workloadInstance)
}
return r.handlePhase(ctx, workloadInstance, phase, span, workloadInstance.IsDeploymentFailed, reconcileWorkloadInstance)
return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsDeploymentFailed, reconcileWorkloadInstance)
}

//Wait for post-deployment checks of Workload
Expand All @@ -190,7 +207,7 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr
reconcilePostDeployment := func() (common.KeptnState, error) {
return r.reconcilePrePostDeployment(ctx, workloadInstance, common.PostDeploymentCheckType)
}
return r.handlePhase(ctx, workloadInstance, phase, span, workloadInstance.IsPostDeploymentFailed, reconcilePostDeployment)
return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPostDeploymentFailed, reconcilePostDeployment)
}

//Wait for post-evaluation checks of Workload
Expand All @@ -206,7 +223,7 @@ func (r *KeptnWorkloadInstanceReconciler) Reconcile(ctx context.Context, req ctr
reconcilePostEval := func() (common.KeptnState, error) {
return r.reconcilePrePostEvaluation(ctx, workloadInstance, common.PostDeploymentEvaluationCheckType)
}
return r.handlePhase(ctx, workloadInstance, phase, span, workloadInstance.IsPostDeploymentEvaluationFailed, reconcilePostEval)
return r.handlePhase(ctx, ctxAppTrace, workloadInstance, phase, span, workloadInstance.IsPostDeploymentEvaluationFailed, reconcilePostEval)
}

// WorkloadInstance is completed at this place
Expand Down Expand Up @@ -260,23 +277,31 @@ func (r *KeptnWorkloadInstanceReconciler) GetActiveDeployments(ctx context.Conte
return res, nil
}

func (r *KeptnWorkloadInstanceReconciler) handlePhase(ctx context.Context, workloadInstance *klcv1alpha1.KeptnWorkloadInstance, phase common.KeptnPhaseType, span trace.Span, phaseFailed func() bool, reconcilePhase func() (common.KeptnState, error)) (ctrl.Result, error) {
func (r *KeptnWorkloadInstanceReconciler) handlePhase(ctx context.Context, ctxAppTrace context.Context, workloadInstance *klcv1alpha1.KeptnWorkloadInstance, phase common.KeptnPhaseType, span trace.Span, phaseFailed func() bool, reconcilePhase func() (common.KeptnState, error)) (ctrl.Result, error) {
r.Log.Info(phase.LongName + " not finished")
overallStateUpdated := false
oldstate := workloadInstance.Status.Status
oldPhase := workloadInstance.Status.CurrentPhase
workloadInstance.Status.CurrentPhase = phase.ShortName

ctxAppTrace, spanAppTrace := r.getSpan(ctxAppTrace, workloadInstance, phase.ShortName)

if phaseFailed() { //TODO eventually we should decide whether a task returns FAILED, currently we never have this status set
r.recordEvent(phase, "Warning", workloadInstance, "Failed", "has failed")
return ctrl.Result{Requeue: true, RequeueAfter: 60 * time.Second}, nil
}
state, err := reconcilePhase()
if err != nil {
spanAppTrace.AddEvent(phase.LongName + " could not get reconciled")
r.recordEvent(phase, "Warning", workloadInstance, "ReconcileErrored", "could not get reconciled")
span.SetStatus(codes.Error, err.Error())
return ctrl.Result{Requeue: true}, err
}
if state.IsSucceeded() {
spanAppTrace.AddEvent(phase.LongName + " has succeeded")
spanAppTrace.SetStatus(codes.Ok, "Succeeded")
spanAppTrace.End()
r.unbindSpan(workloadInstance, phase.ShortName)
r.recordEvent(phase, "Normal", workloadInstance, "Succeeded", "has succeeded")
} else if state.IsFailed() {
r.recordEvent(phase, "Warning", workloadInstance, "Failed", "has failed")
Expand All @@ -286,15 +311,23 @@ func (r *KeptnWorkloadInstanceReconciler) handlePhase(ctx context.Context, workl
attrs := workloadInstance.GetMetricsAttributes()
r.Meters.DeploymentCount.Add(ctx, 1, attrs...)

spanAppTrace.AddEvent(phase.LongName + " has failed")
spanAppTrace.SetStatus(codes.Error, "Failed")
spanAppTrace.End()
r.unbindSpan(workloadInstance, phase.ShortName)

overallStateUpdated = true
} else {
if oldstate != common.StateProgressing {
workloadInstance.Status.Status = common.StateProgressing
overallStateUpdated = true
}
spanAppTrace.AddEvent(phase.LongName + " not finished")
r.recordEvent(phase, "Warning", workloadInstance, "NotFinished", "has not finished")
}
if oldPhase != workloadInstance.Status.CurrentPhase {
ctxAppTrace, spanAppTrace = r.getSpan(ctxAppTrace, workloadInstance, workloadInstance.Status.CurrentPhase)
semconv.AddAttributeFromWorkloadInstance(spanAppTrace, *workloadInstance)
overallStateUpdated = true
}

Expand Down Expand Up @@ -368,3 +401,27 @@ func (r *KeptnWorkloadInstanceReconciler) getAppVersionForWorkloadInstance(ctx c
}
return true, latestVersion, nil
}

func (r *KeptnWorkloadInstanceReconciler) getSpan(ctx context.Context, wli *klcv1alpha1.KeptnWorkloadInstance, phase string) (context.Context, trace.Span) {
wliName := r.getSpanName(wli, phase)
spanName := fmt.Sprintf("%s/%s", wli.Spec.WorkloadName, phase)

if r.bindCRDSpan == nil {
r.bindCRDSpan = make(map[string]trace.Span)
}
if span, ok := r.bindCRDSpan[wliName]; ok {
return ctx, span
}
r.Log.Info("DEBUG: Start Span: " + wliName)
ctx, span := r.Tracer.Start(ctx, spanName, trace.WithSpanKind(trace.SpanKindConsumer))
r.bindCRDSpan[wliName] = span
return ctx, span
}

func (r *KeptnWorkloadInstanceReconciler) unbindSpan(wli *klcv1alpha1.KeptnWorkloadInstance, phase string) {
delete(r.bindCRDSpan, r.getSpanName(wli, phase))
}

func (r *KeptnWorkloadInstanceReconciler) getSpanName(wli *klcv1alpha1.KeptnWorkloadInstance, phase string) string {
return fmt.Sprintf("%s.%s.%s.%s.%s", wli.Spec.TraceId, wli.Spec.AppName, wli.Spec.WorkloadName, wli.Spec.Version, phase)
}

0 comments on commit 145d6bb

Please sign in to comment.