Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[profiles] DAP slow rollout on DS creation #1365

Merged
merged 6 commits into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions apis/datadoghq/common/envvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,8 @@ const (
// version value in the Agent's install info
InstallInfoToolVersion = "DD_TOOL_VERSION"
)

// DatadogAgentProfile env var names
const (
SlowStartEnabled = "DD_DAP_SLOW_START_ENABLED"
)
34 changes: 34 additions & 0 deletions apis/datadoghq/v1alpha1/datadogagentprofile_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@ import (
)

type ComponentName string
type SlowStartStatus string

const (
// NodeAgentComponentName is the name of the Datadog Node Agent
NodeAgentComponentName ComponentName = "nodeAgent"

CompletedStatus SlowStartStatus = "Completed"
WaitingStatus SlowStartStatus = "Waiting"
InProgressStatus SlowStartStatus = "In Progress"
)

// DatadogAgentProfileSpec defines the desired state of DatadogAgentProfile
Expand Down Expand Up @@ -97,6 +102,35 @@ type DatadogAgentProfileStatus struct {
// Applied shows whether the DatadogAgentProfile conflicts with an existing DatadogAgentProfile.
// +optional
Applied metav1.ConditionStatus `json:"applied,omitempty"`

// SlowStart is the state of the slow start feature.
// +optional
SlowStart *SlowStart `json:"slowStart,omitempty"`
}

// SlowStart defines the observed state of the slow start feature based on the agent deployment.
// +k8s:openapi-gen=true
// +kubebuilder:object:generate=true
type SlowStart struct {
// Status shows the current state of the feature.
// +optional
Status SlowStartStatus `json:"status,omitempty"`

// NodesLabeled shows the number of nodes currently labeled.
// +optional
NodesLabeled int32 `json:"nodesLabeled"`

// PodsReady shows the number of pods in the ready state.
// +optional
PodsReady int32 `json:"podsReady"`

// MaxUnavailable shows the number of pods that can be in an unready state.
// +optional
MaxUnavailable int32 `json:"maxUnavailable"`

// LastTransition is the last time the status was updated.
// +optional
LastTransition *metav1.Time `json:"lastTransition,omitempty"`
}

// DatadogAgentProfile is the Schema for the datadogagentprofiles API
Expand Down
24 changes: 24 additions & 0 deletions apis/datadoghq/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 60 additions & 1 deletion apis/datadoghq/v1alpha1/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions config/crd/bases/v1/datadoghq.com_datadogagentprofiles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,29 @@ spec:
description: LastUpdate is the last time the status was updated.
format: date-time
type: string
slowStart:
description: SlowStart is the state of the slow start feature.
properties:
lastTransition:
description: LastTransition is the last time the status was updated.
format: date-time
type: string
maxUnavailable:
description: MaxUnavailable shows the number of pods that can be in an unready state.
format: int32
type: integer
nodesLabeled:
description: NodesLabeled shows the number of nodes currently labeled.
format: int32
type: integer
podsReady:
description: PodsReady shows the number of pods in the ready state.
format: int32
type: integer
status:
description: Status shows the current state of the feature.
type: string
type: object
valid:
description: Valid shows if the DatadogAgentProfile has a valid config spec.
type: string
Expand Down
2 changes: 1 addition & 1 deletion controllers/datadogagent/controller_reconcile_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ func (r *Reconciler) reconcileV2Agent(logger logr.Logger, requiredComponents fea
return reconcile.Result{}, nil
}

return r.createOrUpdateDaemonset(daemonsetLogger, dda, daemonset, newStatus, updateDSStatusV2WithAgent)
return r.createOrUpdateDaemonset(daemonsetLogger, dda, daemonset, newStatus, updateDSStatusV2WithAgent, profile)
}

func updateDSStatusV2WithAgent(ds *appsv1.DaemonSet, newStatus *datadoghqv2alpha1.DatadogAgentStatus, updateTime metav1.Time, status metav1.ConditionStatus, reason, message string) {
Expand Down
8 changes: 4 additions & 4 deletions controllers/datadogagent/controller_reconcile_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger
if r.options.DatadogAgentProfileEnabled {
metrics.DAPEnabled.Set(metrics.TrueValue)
var profilesByNode map[string]types.NamespacedName
profiles, profilesByNode, e = r.profilesToApply(ctx, logger, nodeList, now)
profiles, profilesByNode, e = r.profilesToApply(ctx, logger, nodeList, now, instance)
if err != nil {
return r.updateStatusIfNeededV2(logger, instance, newStatus, result, e, now)
}
Expand Down Expand Up @@ -337,7 +337,7 @@ func (r *Reconciler) updateMetricsForwardersFeatures(dda *datadoghqv2alpha1.Data
// is considered to have priority.
// This function also returns a map that maps each node name to the profile that
// should be applied to it.
func (r *Reconciler) profilesToApply(ctx context.Context, logger logr.Logger, nodeList []corev1.Node, now metav1.Time) ([]datadoghqv1alpha1.DatadogAgentProfile, map[string]types.NamespacedName, error) {
func (r *Reconciler) profilesToApply(ctx context.Context, logger logr.Logger, nodeList []corev1.Node, now metav1.Time, dda *datadoghqv2alpha1.DatadogAgent) ([]datadoghqv1alpha1.DatadogAgentProfile, map[string]types.NamespacedName, error) {
profilesList := datadoghqv1alpha1.DatadogAgentProfileList{}
err := r.client.List(ctx, &profilesList)
if err != nil {
Expand All @@ -349,8 +349,8 @@ func (r *Reconciler) profilesToApply(ctx context.Context, logger logr.Logger, no

sortedProfiles := agentprofile.SortProfiles(profilesList.Items)
for _, profile := range sortedProfiles {

profileAppliedByNode, err = agentprofile.ProfileToApply(logger, &profile, nodeList, profileAppliedByNode, now)
maxUnavailable := agentprofile.GetMaxUnavailable(logger, dda, &profile, len(nodeList))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Code Vulnerability

Potential memory range aliasing. Avoid using the memory reference. (...read more)

Implicit memory aliasing in for loops refers to a scenario in Go programming when two or more pointers reference the same location in memory, creating unexpected side effects. This often results in a common mistake amongst Go programmers due to the 'range' clause.

Consider this example, where a slice of pointers is created in a loop:

data := []int{1, 2, 3}
pointers := make([]*int, 3)
for i, v := range data {
    pointers[i] = &v
}

You might expect the 'pointers' slice to hold addresses of elements in 'data' slice, but that's not the case. In each iteration of the loop, variable 'v' gets a new value but its memory address doesn't change because it's a loop variable. As a result, each element in 'pointers' slice points to the same memory location - the address of the loop variable 'v'. The final value of 'v' is '3', and since all pointers point to 'v', dereferencing the pointers would yield '3' regardless of the pointer's index in the slice.

To avoid implicit memory aliasing in for loops in Go, you should address the actual elements in the original data structure, like so:

data := []int{1, 2, 3}
pointers := make([]*int, 3)
for i := range data {
    pointers[i] = &data[i]
}

In this example, each pointer in the 'pointers' slice correctly points to the respective element in the 'data' slice.

Learn More

View in Datadog  Leave us feedback  Documentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the linked stack overflow issue, should be fine with go 1.22

profileAppliedByNode, err = agentprofile.ApplyProfile(logger, &profile, nodeList, profileAppliedByNode, now, maxUnavailable)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 Code Vulnerability

Potential memory range aliasing. Avoid using the memory reference. (...read more)

Implicit memory aliasing in for loops refers to a scenario in Go programming when two or more pointers reference the same location in memory, creating unexpected side effects. This often results in a common mistake amongst Go programmers due to the 'range' clause.

Consider this example, where a slice of pointers is created in a loop:

data := []int{1, 2, 3}
pointers := make([]*int, 3)
for i, v := range data {
    pointers[i] = &v
}

You might expect the 'pointers' slice to hold addresses of elements in 'data' slice, but that's not the case. In each iteration of the loop, variable 'v' gets a new value but its memory address doesn't change because it's a loop variable. As a result, each element in 'pointers' slice points to the same memory location - the address of the loop variable 'v'. The final value of 'v' is '3', and since all pointers point to 'v', dereferencing the pointers would yield '3' regardless of the pointer's index in the slice.

To avoid implicit memory aliasing in for loops in Go, you should address the actual elements in the original data structure, like so:

data := []int{1, 2, 3}
pointers := make([]*int, 3)
for i := range data {
    pointers[i] = &data[i]
}

In this example, each pointer in the 'pointers' slice correctly points to the respective element in the 'data' slice.

Learn More

View in Datadog  Leave us feedback  Documentation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the linked stack overflow issue, should be fine with go 1.22

r.updateDAPStatus(logger, &profile)
if err != nil {
// profile is invalid or conflicts
Expand Down
41 changes: 38 additions & 3 deletions controllers/datadogagent/controller_reconcile_v2_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"fmt"
"time"

"github.com/DataDog/datadog-operator/apis/datadoghq/v1alpha1"
datadoghqv2alpha1 "github.com/DataDog/datadog-operator/apis/datadoghq/v2alpha1"
"github.com/DataDog/datadog-operator/pkg/agentprofile"
"github.com/DataDog/datadog-operator/pkg/controller/utils/comparison"
Expand Down Expand Up @@ -123,7 +124,7 @@ func (r *Reconciler) createOrUpdateDeployment(parentLogger logr.Logger, dda *dat
return result, err
}

func (r *Reconciler) createOrUpdateDaemonset(parentLogger logr.Logger, dda *datadoghqv2alpha1.DatadogAgent, daemonset *appsv1.DaemonSet, newStatus *datadoghqv2alpha1.DatadogAgentStatus, updateStatusFunc updateDSStatusComponentFunc) (reconcile.Result, error) {
func (r *Reconciler) createOrUpdateDaemonset(parentLogger logr.Logger, dda *datadoghqv2alpha1.DatadogAgent, daemonset *appsv1.DaemonSet, newStatus *datadoghqv2alpha1.DatadogAgentStatus, updateStatusFunc updateDSStatusComponentFunc, profile *v1alpha1.DatadogAgentProfile) (reconcile.Result, error) {
logger := parentLogger.WithValues("daemonset.Namespace", daemonset.Namespace, "daemonset.Name", daemonset.Name)

var result reconcile.Result
Expand Down Expand Up @@ -154,6 +155,26 @@ func (r *Reconciler) createOrUpdateDaemonset(parentLogger logr.Logger, dda *data
}

if alreadyExists {
now := metav1.NewTime(time.Now())
if agentprofile.SlowStartEnabled() {
if profile.Status.SlowStart != nil {
profile.Status.SlowStart.PodsReady = currentDaemonset.Status.NumberReady
}
if shouldCheckSlowStartStatus(profile) {
newStatus := v1alpha1.WaitingStatus

if int(profile.Status.SlowStart.NodesLabeled-currentDaemonset.Status.NumberReady) < int(profile.Status.SlowStart.MaxUnavailable) {
newStatus = v1alpha1.InProgressStatus
}

if profile.Status.SlowStart.Status != newStatus {
profile.Status.SlowStart.LastTransition = &now
}
profile.Status.SlowStart.Status = newStatus
}
r.updateDAPStatus(logger, profile)
}

// When overriding node labels in <1.7.0, the hash could be updated
// without updating the pod template spec in <1.7.0 since pod template
// labels were copied over directly from the existing daemonset.
Expand Down Expand Up @@ -197,7 +218,6 @@ func (r *Reconciler) createOrUpdateDaemonset(parentLogger logr.Logger, dda *data
// Even if the DaemonSet is still the same, its status might have
// changed (for example, the number of pods ready). This call is
// needed to keep the agent status updated.
now := metav1.NewTime(time.Now())
newStatus.AgentList = datadoghqv2alpha1.UpdateDaemonSetStatus(currentDaemonset, newStatus.AgentList, &now)
newStatus.Agent = datadoghqv2alpha1.UpdateCombinedDaemonSetStatus(newStatus.AgentList)

Expand All @@ -220,7 +240,6 @@ func (r *Reconciler) createOrUpdateDaemonset(parentLogger logr.Logger, dda *data
// won't filter labels with "datadoghq.com" in the key
delete(updateDaemonset.Labels, agentprofile.OldProfileLabelKey)

now := metav1.NewTime(time.Now())
err = kubernetes.UpdateFromObject(context.TODO(), r.client, updateDaemonset, currentDaemonset.ObjectMeta)
if err != nil {
updateStatusFunc(updateDaemonset, newStatus, now, metav1.ConditionFalse, updateSucceeded, "Unable to update Daemonset")
Expand Down Expand Up @@ -371,3 +390,19 @@ func ensureSelectorInPodTemplateLabels(logger logr.Logger, selector *metav1.Labe

return labels
}

func shouldCheckSlowStartStatus(profile *v1alpha1.DatadogAgentProfile) bool {
if profile == nil {
return false
}

if profile.Name == "" || profile.Name == "default" {
return false
}

if profile.Status.SlowStart == nil {
return false
}

return profile.Status.SlowStart.Status != v1alpha1.CompletedStatus
}
Loading
Loading