Skip to content

Commit

Permalink
[agent-smith] Support CPU limit penalties
Browse files Browse the repository at this point in the history
  • Loading branch information
csweichel committed Aug 6, 2021
1 parent f16d4fe commit aefc69e
Show file tree
Hide file tree
Showing 10 changed files with 215 additions and 6 deletions.
3 changes: 3 additions & 0 deletions chart/templates/agent-smith-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ data:
"baseBudget": "2Gi",
"perDtThreshold": "250Mi"
}
},
"kubernetes": {
"enabled": true
}
}
{{- end -}}
8 changes: 7 additions & 1 deletion chart/templates/agent-smith-role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,11 @@ rules:
- "use"
resourceNames:
- "{{ .Release.Namespace }}-ns-privileged-unconfined"

- apiGroups:
- ""
resources:
- pods
verbs:
- get
- update
{{- end -}}
2 changes: 1 addition & 1 deletion components/common-go/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ const (
TraceIDAnnotation = "gitpod/traceid"

// CPULimitAnnotation enforces a strict CPU limit on a workspace by virtue of ws-daemon
CPULimitAnnotation = "gitpod/cpuLimit"
CPULimitAnnotation = "gitpod.io/cpuLimit"

// RequiredNodeServicesAnnotation lists all Gitpod services required on the node
RequiredNodeServicesAnnotation = "gitpod.io/requiredNodeServices"
Expand Down
7 changes: 7 additions & 0 deletions components/content-service/pkg/storage/gcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,13 @@ func gcpEnsureExists(ctx context.Context, client *gcpstorage.Client, bucketName
log.WithField("bucketName", bucketName).Debug("Creating bucket")
err = hdl.Create(ctx, gcpConfig.Project, &gcpstorage.BucketAttrs{
Location: gcpConfig.Region,
CORS: []gcpstorage.CORS{
{
Origins: []string{"*"},
Methods: []string{"GET"},
MaxAge: 6 * time.Hour,
},
},
})
if e, ok := err.(*googleapi.Error); ok && e.Code == http.StatusConflict && strings.Contains(strings.ToLower(e.Message), "you already own this bucket") {
// Looks like we had a bucket creation race and lost.
Expand Down
1 change: 1 addition & 0 deletions components/ee/agent-smith/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require (
golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
k8s.io/apimachinery v0.21.1
k8s.io/client-go v1.5.2
moul.io/http2curl v1.0.0 // indirect
)

Expand Down
127 changes: 127 additions & 0 deletions components/ee/agent-smith/go.sum

Large diffs are not rendered by default.

29 changes: 28 additions & 1 deletion components/ee/agent-smith/pkg/agent/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ import (
"context"
"fmt"

wsk8s "github.com/gitpod-io/gitpod/common-go/kubernetes"
"github.com/gitpod-io/gitpod/common-go/log"
protocol "github.com/gitpod-io/gitpod/gitpod-protocol"

"golang.org/x/sys/unix"
corev1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/retry"
)

// all functions in this file deal directly with Kubernetes and make several assumptions
Expand Down Expand Up @@ -45,6 +49,29 @@ func (agent *Smith) blockUser(ownerID string) error {
}

func (agent *Smith) limitCPUUse(podname string) error {
// todo(fntlnz): limiting CPU usage via editing the cgroup or using nice/renice seems to be the only option here
if agent.Kubernetes == nil {
return fmt.Errorf("not connected to Kubernetes - cannot limit CPU usage")
}
if agent.Config.Enforcement.CPULimitPenalty == "" {
return fmt.Errorf("no CPU limit penalty specified - cannot limit CPU usage")
}

ctx := context.Background()
retry.RetryOnConflict(retry.DefaultBackoff, func() error {
pods := agent.Kubernetes.CoreV1().Pods(agent.Config.KubernetesNamespace)
pod, err := pods.Get(ctx, podname, corev1.GetOptions{})
if err != nil {
return err
}

pod.Annotations[wsk8s.CPULimitAnnotation] = agent.Config.Enforcement.CPULimitPenalty
_, err = pods.Update(ctx, pod, corev1.UpdateOptions{})
if err != nil {
return err
}

return nil
})

return nil
}
33 changes: 33 additions & 0 deletions components/ee/agent-smith/pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ import (
"golang.org/x/xerrors"

"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
)

const (
Expand All @@ -46,6 +49,7 @@ type Smith struct {
Config Config
GitpodAPI gitpod.APIInterface
EnforcementRules map[string]EnforcementRules
Kubernetes kubernetes.Interface
metrics *metrics

notifiedInfringements *lru.Cache
Expand Down Expand Up @@ -110,6 +114,11 @@ func NewAgentSmith(cfg Config) (*Smith, error) {
return nil, err
}

// establish default CPU limit penalty
if cfg.Enforcement.CPULimitPenalty == "" {
cfg.Enforcement.CPULimitPenalty = "500m"
}

var api gitpod.APIInterface
if cfg.GitpodAPI.HostURL != "" {
u, err := url.Parse(cfg.GitpodAPI.HostURL)
Expand All @@ -128,6 +137,29 @@ func NewAgentSmith(cfg Config) (*Smith, error) {
}
}

var clientset kubernetes.Interface
if cfg.Kubernetes.Enabled {
if cfg.Kubernetes.Kubeconfig != "" {
res, err := clientcmd.BuildConfigFromFlags("", cfg.Kubernetes.Kubeconfig)
if err != nil {
return nil, xerrors.Errorf("cannot connect to kubernetes: %w", err)
}
clientset, err = kubernetes.NewForConfig(res)
if err != nil {
return nil, xerrors.Errorf("cannot connect to kubernetes: %w", err)
}
} else {
k8s, err := rest.InClusterConfig()
if err != nil {
return nil, xerrors.Errorf("cannot connect to kubernetes: %w", err)
}
clientset, err = kubernetes.NewForConfig(k8s)
if err != nil {
return nil, xerrors.Errorf("cannot connect to kubernetes: %w", err)
}
}
}

m := newAgentMetrics()
pidsMap := syncMapCounter{}
pidsMap.WithCounter(m.currentlyMonitoredPIDS)
Expand All @@ -146,6 +178,7 @@ func NewAgentSmith(cfg Config) (*Smith, error) {
},
Config: cfg,
GitpodAPI: api,
Kubernetes: clientset,
notifiedInfringements: notificationCache,
perfHandler: make(chan perfHandlerFunc, 10),
metrics: m,
Expand Down
9 changes: 7 additions & 2 deletions components/ee/agent-smith/pkg/agent/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@ type Config struct {
Blacklists *Blacklists `json:"blacklists,omitempty"`
EgressTraffic *EgressTraffic `json:"egressTraffic,omitempty"`
Enforcement struct {
Default *EnforcementRules `json:"default,omitempty"`
PerRepo map[string]EnforcementRules `json:"perRepo,omitempty"`
Default *EnforcementRules `json:"default,omitempty"`
PerRepo map[string]EnforcementRules `json:"perRepo,omitempty"`
CPULimitPenalty string `json:"cpuLimitPenalty,omitempty"`
} `json:"enforcement,omitempty"`
ExcessiveCPUCheck *struct {
Threshold float32 `json:"threshold"`
AverageOver int `json:"averageOverMinutes"`
} `json:"excessiveCPUCheck,omitempty"`
SlackWebhooks *SlackWebhooks `json:"slackWebhooks,omitempty"`
Kubernetes struct {
Enabled bool `json:"enabled"`
Kubeconfig string `json:"kubeconfig,omitempty"`
} `json:"kubernetes"`

ProbePath string `json:"probePath,omitempty"`
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
"annotations": {
"cluster-autoscaler.kubernetes.io/safe-to-evict": "false",
"container.apparmor.security.beta.kubernetes.io/workspace": "unconfined",
"gitpod.io/cpuLimit": "900m",
"gitpod.io/requiredNodeServices": "ws-daemon,registry-facade",
"gitpod/admission": "admit_owner_only",
"gitpod/contentInitializer": "GmcKZXdvcmtzcGFjZXMvY3J5cHRpYy1pZC1nb2VzLWhlcmcvZmQ2MjgwNGItNGNhYi0xMWU5LTg0M2EtNGU2NDUzNzMwNDhlLnRhckBnaXRwb2QtZGV2LXVzZXItY2hyaXN0ZXN0aW5n",
"gitpod/cpuLimit": "900m",
"gitpod/id": "test",
"gitpod/imageSpec": "Cm1ldS5nY3IuaW8vZ2l0cG9kLWRldi93b3Jrc3BhY2UtYmFzZS1pbWFnZXMvZ2l0aHViLmNvbS90eXBlZm94L2dpdHBvZDo4MGE3ZDQyN2ExZmNkMzQ2ZDQyMDYwM2Q4MGEzMWQ1N2NmNzVhN2FmEjRldS5nY3IuaW8vZ2l0cG9kLWNvcmUtZGV2L2J1aWQvdGhlaWEtaWRlOnNvbWV2ZXJzaW9u",
"gitpod/never-ready": "true",
Expand Down

0 comments on commit aefc69e

Please sign in to comment.