Skip to content

Commit

Permalink
Merge branch 'main' into bertinmeshita/sc-18608/increase-cli-low-reso…
Browse files Browse the repository at this point in the history
…urces
  • Loading branch information
bertinm-gc authored Oct 22, 2024
2 parents b842397 + 9db07f3 commit 9dc5e32
Show file tree
Hide file tree
Showing 12 changed files with 167 additions and 53 deletions.
1 change: 1 addition & 0 deletions .goreleaser.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ builds:
goos:
- linux
- darwin
- windows
goarch:
- amd64
- arm64
Expand Down
64 changes: 64 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,70 @@ We use the following categories for changes:

### Security

## [0.22.6] 2024-10-21

### Added

### Changed

### Fixed

- Low resource preset should take precedence over kernel 5.11 overrides [#sc-20056]

### Removed

### Deprecated

### Security

## [0.22.5] 2024-10-13

### Added

### Changed

- Update low resources presets to include new components [#sc-20036]

### Fixed

### Removed

### Deprecated

### Security

## [0.22.4] 2024-10-07

### Added

### Changed

### Fixed

- Waiting for sensor pods instead of alligator pods [#sc-19833]

### Removed

### Deprecated

### Security

## [0.22.3] 2024-09-17

### Added

- Support groundcover CLI in windows [#sc-19269]

### Changed

### Fixed

### Removed

### Deprecated

### Security

## [0.22.2] 2024-07-04

### Added
Expand Down
10 changes: 2 additions & 8 deletions cmd/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"strconv"
"time"

"github.com/blang/semver/v4"
"github.com/getsentry/sentry-go"
"github.com/imdario/mergo"
"github.com/pkg/errors"
Expand Down Expand Up @@ -46,7 +45,6 @@ const (
HELM_REPO_URL = "https://helm.groundcover.com"
CLUSTER_URL_FORMAT = "%s/?clusterId=%s&viewType=Overview"
QUAY_REGISTRY_PRESET_PATH = "presets/quay.yaml"
AGENT_KERNEL_5_11_PRESET_PATH = "presets/agent/kernel-5-11.yaml"
CUSTOM_METRICS_PRESET_PATH = "presets/backend/custom-metrics.yaml"
KUBE_STATE_METRICS_PRESET_PATH = "presets/backend/kube-state-metrics.yaml"
STORAGE_CLASS_TEMPLATE_PATH = "templates/backend/storage-class.yaml"
Expand Down Expand Up @@ -471,7 +469,7 @@ func validateInstall(ctx context.Context, kubeClient *k8s.Client, releaseName, n
}

if agentEnabled {
if err = waitForAlligators(ctx, kubeClient, namespace, appVersion, deployableNodesCount, sentryHelmContext); err != nil {
if err = waitForSensors(ctx, kubeClient, namespace, appVersion, deployableNodesCount, sentryHelmContext); err != nil {
return err
}
}
Expand Down Expand Up @@ -607,7 +605,7 @@ func generateChartValues(chartValues map[string]interface{}, apiKey, installatio
helm.BACKEND_LOW_RESOURCES_PATH,
}
} else {
agentPresetPath := helm.GetAgentResourcePresetPath(allocatableResources)
agentPresetPath := helm.GetAgentResourcePresetPath(allocatableResources, nodesReport.MaximalKernelVersion())
if agentPresetPath != helm.DEFAULT_PRESET {
overridePaths = append(overridePaths, agentPresetPath)
}
Expand All @@ -632,10 +630,6 @@ func generateChartValues(chartValues map[string]interface{}, apiKey, installatio
overridePaths = append(overridePaths, KUBE_STATE_METRICS_PRESET_PATH)
}

if semver.MustParseRange(">=5.11.0")(nodesReport.MaximalKernelVersion()) {
overridePaths = append(overridePaths, AGENT_KERNEL_5_11_PRESET_PATH)
}

if len(overridePaths) > 0 {
sentryHelmContext.ResourcesPresets = overridePaths
sentryHelmContext.SetOnCurrentScope()
Expand Down
68 changes: 34 additions & 34 deletions cmd/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,18 @@ const (
PVC_POLLING_RETRIES = 40
PVC_POLLING_TIMEOUT = time.Minute * 10

ALLIGATORS_POLLING_INTERVAL = time.Second * 15
ALLIGATORS_POLLING_RETRIES = 28
ALLIGATORS_POLLING_TIMEOUT = time.Minute * 7
SENSORS_POLLING_INTERVAL = time.Second * 15
SENSORS_POLLING_RETRIES = 28
SENSORS_POLLING_TIMEOUT = time.Minute * 7

ALLIGATOR_LABEL_SELECTOR = "app=alligator"
BACKEND_LABEL_SELECTOR = "app!=alligator"
PORTAL_LABEL_SELECTOR = "app=portal"
RUNNING_FIELD_SELECTOR = "status.phase=Running"
SENSOR_LABEL_SELECTOR = "app=sensor"
BACKEND_LABEL_SELECTOR = "app!=sensor"
PORTAL_LABEL_SELECTOR = "app=portal"
RUNNING_FIELD_SELECTOR = "status.phase=Running"

WAIT_FOR_PORTAL_FORMAT = "Waiting until cluster establish connectivity"
WAIT_FOR_PVCS_FORMAT = "Waiting until all PVCs are bound (%d/%d PVCs)"
WAIT_FOR_ALLIGATORS_FORMAT = "Waiting until all nodes are monitored (%d/%d Nodes)"
WAIT_FOR_SENSORS_FORMAT = "Waiting until all nodes are monitored (%d/%d Nodes)"
TIMEOUT_INSTALLATION_FORMAT = "Installation takes longer than expected, you can check the status using \"kubectl get pods -n %s\""

PVCS_VALIDATION_EVENT_NAME = "pvcs_validation"
Expand Down Expand Up @@ -117,7 +117,7 @@ var StatusCmd = &cobra.Command{
}
nodesCount := len(nodeList.Items)

if err = waitForAlligators(ctx, kubeClient, namespace, chart.AppVersion(), nodesCount, sentryHelmContext); err != nil {
if err = waitForSensors(ctx, kubeClient, namespace, chart.AppVersion(), nodesCount, sentryHelmContext); err != nil {
return err
}

Expand Down Expand Up @@ -178,7 +178,7 @@ func waitForPortal(ctx context.Context, kubeClient *k8s.Client, namespace, appVe
return err
}

func waitForAlligators(ctx context.Context, kubeClient *k8s.Client, namespace, appVersion string, expectedAlligatorsCount int, sentryHelmContext *sentry_utils.HelmContext) error {
func waitForSensors(ctx context.Context, kubeClient *k8s.Client, namespace, appVersion string, expectedSensorsCount int, sentryHelmContext *sentry_utils.HelmContext) error {
var err error

event := segment.NewEvent(AGENTS_VALIDATION_EVENT_NAME)
Expand All @@ -187,43 +187,43 @@ func waitForAlligators(ctx context.Context, kubeClient *k8s.Client, namespace, a
event.StatusByError(err)
}()

spinner := ui.GlobalWriter.NewSpinner(fmt.Sprintf(WAIT_FOR_ALLIGATORS_FORMAT, 0, expectedAlligatorsCount))
spinner.SetStopMessage(fmt.Sprintf("All nodes are monitored (%d/%d Nodes)", expectedAlligatorsCount, expectedAlligatorsCount))
spinner := ui.GlobalWriter.NewSpinner(fmt.Sprintf(WAIT_FOR_SENSORS_FORMAT, 0, expectedSensorsCount))
spinner.SetStopMessage(fmt.Sprintf("All nodes are monitored (%d/%d Nodes)", expectedSensorsCount, expectedSensorsCount))
spinner.SetStopFailMessage(fmt.Sprintf(TIMEOUT_INSTALLATION_FORMAT, namespace))

spinner.Start()
defer spinner.WriteStop()

runningAlligators := 0
runningSensors := 0

isAlligatorRunningFunc := func() error {
isSensorRunningFunc := func() error {
var err error

if runningAlligators, err = getRunningAlligators(ctx, kubeClient, appVersion, namespace); err != nil {
if runningSensors, err = getRunningSensors(ctx, kubeClient, appVersion, namespace); err != nil {
return err
}

spinner.WriteMessage(fmt.Sprintf(WAIT_FOR_ALLIGATORS_FORMAT, runningAlligators, expectedAlligatorsCount))
spinner.WriteMessage(fmt.Sprintf(WAIT_FOR_SENSORS_FORMAT, runningSensors, expectedSensorsCount))

if runningAlligators >= expectedAlligatorsCount {
if runningSensors >= expectedSensorsCount {
return nil
}

err = errors.New("not all expected alligators are running")
err = errors.New("not all expected sensors are running")
return ui.RetryableError(err)
}

err = spinner.Poll(ctx, isAlligatorRunningFunc, ALLIGATORS_POLLING_INTERVAL, ALLIGATORS_POLLING_TIMEOUT, ALLIGATORS_POLLING_RETRIES)
err = spinner.Poll(ctx, isSensorRunningFunc, SENSORS_POLLING_INTERVAL, SENSORS_POLLING_TIMEOUT, SENSORS_POLLING_RETRIES)

runningAlligatorsStr := fmt.Sprintf("%d/%d", runningAlligators, expectedAlligatorsCount)
sentryHelmContext.RunningAlligators = runningAlligatorsStr
sentry_utils.SetTagOnCurrentScope(sentry_utils.EXPECTED_NODES_COUNT_TAG, fmt.Sprintf("%d", expectedAlligatorsCount))
sentry_utils.SetTagOnCurrentScope(sentry_utils.RUNNING_ALLIGATORS_TAG, runningAlligatorsStr)
runningSensorsStr := fmt.Sprintf("%d/%d", runningSensors, expectedSensorsCount)
sentryHelmContext.RunningSensors = runningSensorsStr
sentry_utils.SetTagOnCurrentScope(sentry_utils.EXPECTED_NODES_COUNT_TAG, fmt.Sprintf("%d", expectedSensorsCount))
sentry_utils.SetTagOnCurrentScope(sentry_utils.RUNNING_SENSORS_TAG, runningSensorsStr)

sentryHelmContext.SetOnCurrentScope()
event.
Set("alligatorsCount", expectedAlligatorsCount).
Set("runningAlligatorsCount", runningAlligators)
Set("sensorsCount", expectedSensorsCount).
Set("runningSensorsCount", runningSensors)

if err == nil {
return nil
Expand All @@ -232,9 +232,9 @@ func waitForAlligators(ctx context.Context, kubeClient *k8s.Client, namespace, a
defer spinner.WriteStopFail()

if errors.Is(err, ui.ErrSpinnerTimeout) {
if runningAlligators > 0 {
if runningSensors > 0 {
spinner.SetWarningSign()
spinner.SetStopFailMessage(fmt.Sprintf("groundcover managed to provision %d/%d nodes", runningAlligators, expectedAlligatorsCount))
spinner.SetStopFailMessage(fmt.Sprintf("groundcover managed to provision %d/%d nodes", runningSensors, expectedSensorsCount))
}

return ErrExecutionPartialSuccess
Expand All @@ -243,27 +243,27 @@ func waitForAlligators(ctx context.Context, kubeClient *k8s.Client, namespace, a
return err
}

func getRunningAlligators(ctx context.Context, kubeClient *k8s.Client, appVersion string, namespace string) (int, error) {
func getRunningSensors(ctx context.Context, kubeClient *k8s.Client, appVersion string, namespace string) (int, error) {
podClient := kubeClient.CoreV1().Pods(namespace)
listOptions := metav1.ListOptions{
LabelSelector: ALLIGATOR_LABEL_SELECTOR,
LabelSelector: SENSOR_LABEL_SELECTOR,
FieldSelector: RUNNING_FIELD_SELECTOR,
}

runningAlligators := 0
runningSensors := 0

podList, err := podClient.List(ctx, listOptions)
if err != nil {
return runningAlligators, err
return runningSensors, err
}

for _, pod := range podList.Items {
if pod.Annotations["groundcover_version"] == appVersion {
runningAlligators++
runningSensors++
}
}

return runningAlligators, nil
return runningSensors, nil
}

func reportPodsStatus(ctx context.Context, kubeClient *k8s.Client, namespace string, sentryHelmContext *sentry_utils.HelmContext) {
Expand All @@ -272,7 +272,7 @@ func reportPodsStatus(ctx context.Context, kubeClient *k8s.Client, namespace str
return
}

agentPodsStatus, err := listPodsStatuses(ctx, kubeClient, namespace, metav1.ListOptions{LabelSelector: ALLIGATOR_LABEL_SELECTOR})
agentPodsStatus, err := listPodsStatuses(ctx, kubeClient, namespace, metav1.ListOptions{LabelSelector: SENSOR_LABEL_SELECTOR})
if err != nil {
return
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/helm/presets/agent/kernel-5-11.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
agent:
alligator:
sensor:
resources:
requests:
memory: 1Gi
Expand Down
2 changes: 1 addition & 1 deletion pkg/helm/presets/agent/low-resources.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
agent:
alligator:
sensor:
resources:
requests:
cpu: 100m
Expand Down
15 changes: 14 additions & 1 deletion pkg/helm/presets/backend/low-resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ monitors-manager:
limits:
memory: 150Mi


backend:
postgresql:
primary:
Expand All @@ -83,4 +82,18 @@ backend:
memory: 40Mi
limits:
memory: 60Mi
keep:
backend:
resources:
requests:
cpu: 100m
memory: 128Mi

vector:
replicas: 1
resources:
requests:
cpu: 50m
memory: 256Mi
limits:
memory: 1024Mi
14 changes: 13 additions & 1 deletion pkg/helm/tune.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package helm
import (
"embed"

"github.com/blang/semver/v4"
"groundcover.com/pkg/k8s"
"k8s.io/apimachinery/pkg/api/resource"
)
Expand All @@ -17,6 +18,13 @@ const (
AGENT_DEFAULT_MEMORY_THRESHOLD = "1024Mi"
AGENT_LOW_RESOURCES_PATH = "presets/agent/low-resources.yaml"

// Starting from Linux kernel version 5.11, eBPF maps are accounted for in the memory cgroup
// of the process that created them. For this reason we need to increase the memory limit for
// the agent.
// https://github.com/cilium/ebpf/blob/v0.16.0/docs/ebpf/concepts/rlimit.md#resource-limits
AGENT_KERNEL_5_11_PRESET_PATH = "presets/agent/kernel-5-11.yaml"
KERNEL_5_11_SEMVER_EXPRESSION = ">=5.11.0"

EMPTYDIR_STORAGE_PATH = "presets/backend/emptydir-storage.yaml"

BACKEND_DEFAULT_TOTAL_CPU_THRESHOLD = "12000m"
Expand All @@ -39,7 +47,7 @@ type AllocatableResources struct {
NodeCount int
}

func GetAgentResourcePresetPath(allocatableResources *AllocatableResources) string {
func GetAgentResourcePresetPath(allocatableResources *AllocatableResources, maxKernelVersion semver.Version) string {
defaultCpuThreshold := resource.MustParse(AGENT_DEFAULT_CPU_THRESHOLD)
defaultMemoryThreshold := resource.MustParse(AGENT_DEFAULT_MEMORY_THRESHOLD)

Expand All @@ -50,6 +58,10 @@ func GetAgentResourcePresetPath(allocatableResources *AllocatableResources) stri
return AGENT_LOW_RESOURCES_PATH
}

if semver.MustParseRange(KERNEL_5_11_SEMVER_EXPRESSION)(maxKernelVersion) {
return AGENT_KERNEL_5_11_PRESET_PATH
}

return DEFAULT_PRESET
}

Expand Down
Loading

0 comments on commit 9dc5e32

Please sign in to comment.