From e7aac63979ca69457f178aa33946e95254886328 Mon Sep 17 00:00:00 2001 From: Luke Kysow <1034429+lkysow@users.noreply.github.com> Date: Thu, 21 May 2020 13:31:05 -0700 Subject: [PATCH] Set default resources This enables our chart to be deployed into clusters that require resources to be set for each container. This also prevents our pods from being evicted first when Kubernetes runs out of resources because Kubernetes will prefer to evict pods that have no resource settings. To that end we have set defaults for clients and servers because even in first-run tests we observed those pods being evicted, causing confusing issues. We are setting our requests and limits to the same value so we are considered to have a 'Guaranteed' quality of service class. These default settings were chosen based on observation in a running cluster using `kubectl top` and also looking at GKE metrics. We ran a Consul connect workload at 1 req/s. For the helper components, the load we saw is representative of a production cluster because the workload of these components doesn't change, e.g. the lifecycle sidecar. For the core components like clients, servers and mesh gateways, their resource requirements will increase depending on the workload. We have set their resources to more than double what was observed in a base-case installation so that first-run doesn't require an unnecessary amount of resources but also won't run up against any limits. It's expected that for production, users will tune their settings based on their specific use-case and observed load. In our observations, most of the "helper" containers used a maximum of 10 millicores and 12 Mi of memory. We have set these smaller containers and init containers to 50 millicores and 25 Mi of memory to be safe. For the lifecycle sidecar we observed 2m/12Mi. We set this to 10m/25Mi to keep this footprint small. This setting will also be used for connect-injected apps so we can't increase it too much or it will cause apps to have large resource footprints unnecessarily. For components like sync catalog, connect inject and snapshot agent we saw max usage of 44m/14Mi. Since these are long-running components, we set their resources to 50m/50Mi. For Consul clients we saw maximum usage of 10m/15Mi, we have set these to 100m/100Mi. For Consul servers we same maximum usage of 40m/13Mi. We have set these to 100m/100Mi. For mesh gateways with multi-dc federation we observed maximum usage of 15m/13Mi. We have set these to 100m/100Mi. --- templates/_helpers.tpl | 9 ++++- templates/client-daemonset.yaml | 14 +++++++ .../client-snapshot-agent-deployment.yaml | 16 +++++++- templates/connect-inject-deployment.yaml | 14 +++++++ templates/create-federation-secret-job.yaml | 7 ++++ templates/enterprise-license-job.yaml | 14 +++++++ templates/mesh-gateway-deployment.yaml | 21 ++++++++++ templates/server-acl-init-cleanup-job.yaml | 7 ++++ templates/server-acl-init-job.yaml | 7 ++++ templates/sync-catalog-deployment.yaml | 14 +++++++ templates/tls-init-cleanup-job.yaml | 7 ++++ templates/tls-init-job.yaml | 7 ++++ test/unit/client-daemonset.bats | 10 ++--- test/unit/mesh-gateway-deployment.bats | 8 ++-- test/unit/server-statefulset.bats | 10 ++--- values.yaml | 38 ++++++++++--------- 16 files changed, 169 insertions(+), 34 deletions(-) diff --git a/templates/_helpers.tpl b/templates/_helpers.tpl index 984098f28..b42435915 100644 --- a/templates/_helpers.tpl +++ b/templates/_helpers.tpl @@ -97,4 +97,11 @@ This template is for an init container. {{- end }} - name: consul-auto-encrypt-ca-cert mountPath: /consul/tls/client/ca -{{- end -}} \ No newline at end of file + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" +{{- end -}} diff --git a/templates/client-daemonset.yaml b/templates/client-daemonset.yaml index 40949ee41..39bfe7dd4 100644 --- a/templates/client-daemonset.yaml +++ b/templates/client-daemonset.yaml @@ -317,6 +317,13 @@ spec: volumeMounts: - name: aclconfig mountPath: /consul/aclconfig + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- if and .Values.global.tls.enabled (not .Values.global.tls.enableAutoEncrypt) }} - name: client-tls-init @@ -348,6 +355,13 @@ spec: - name: consul-ca-key mountPath: /consul/tls/ca/key readOnly: true + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- end }} {{- if .Values.client.nodeSelector }} diff --git a/templates/client-snapshot-agent-deployment.yaml b/templates/client-snapshot-agent-deployment.yaml index a09a3a860..291177e7f 100644 --- a/templates/client-snapshot-agent-deployment.yaml +++ b/templates/client-snapshot-agent-deployment.yaml @@ -125,8 +125,15 @@ spec: {{- end }} mountPath: /consul/tls/ca readOnly: true + {{- end }} {{- end }} - {{- end }} + resources: + requests: + memory: "50Mi" + cpu: "50m" + limits: + memory: "50Mi" + cpu: "50m" {{- if (or (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt)) }} initContainers: {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }} @@ -142,6 +149,13 @@ spec: volumeMounts: - name: aclconfig mountPath: /consul/aclconfig + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }} {{- include "consul.getAutoEncryptClientCA" . | nindent 6 }} diff --git a/templates/connect-inject-deployment.yaml b/templates/connect-inject-deployment.yaml index 0f4227232..6d616901e 100644 --- a/templates/connect-inject-deployment.yaml +++ b/templates/connect-inject-deployment.yaml @@ -161,6 +161,13 @@ spec: readOnly: true {{- end }} {{- end }} + resources: + requests: + memory: "50Mi" + cpu: "50m" + limits: + memory: "50Mi" + cpu: "50m" {{- if (or .Values.connectInject.certs.secretName .Values.global.tls.enabled) }} volumes: {{- if .Values.connectInject.certs.secretName }} @@ -200,6 +207,13 @@ spec: consul-k8s acl-init \ -secret-name="{{ template "consul.fullname" . }}-connect-inject-acl-token" \ -k8s-namespace={{ .Release.Namespace }} + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }} {{- include "consul.getAutoEncryptClientCA" . | nindent 6 }} diff --git a/templates/create-federation-secret-job.yaml b/templates/create-federation-secret-job.yaml index f513b159a..fe493edca 100644 --- a/templates/create-federation-secret-job.yaml +++ b/templates/create-federation-secret-job.yaml @@ -127,4 +127,11 @@ spec: -resource-prefix="{{ template "consul.fullname" . }}" \ -server-ca-cert-file=/consul/tls/ca/tls.crt \ -server-ca-key-file=/consul/tls/server/ca/tls.key + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} diff --git a/templates/enterprise-license-job.yaml b/templates/enterprise-license-job.yaml index bc6b02fc6..0adf38c53 100644 --- a/templates/enterprise-license-job.yaml +++ b/templates/enterprise-license-job.yaml @@ -99,6 +99,13 @@ spec: mountPath: /consul/tls/ca readOnly: true {{- end }} + resources: + requests: + memory: "50Mi" + cpu: "50m" + limits: + memory: "50Mi" + cpu: "50m" {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }} initContainers: - name: ent-license-acl-init @@ -110,6 +117,13 @@ spec: consul-k8s acl-init \ -secret-name="{{ template "consul.fullname" . }}-enterprise-license-acl-token" \ -k8s-namespace={{ .Release.Namespace }} + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- end }} {{- end }} diff --git a/templates/mesh-gateway-deployment.yaml b/templates/mesh-gateway-deployment.yaml index f1dcaeb48..2832eea2f 100644 --- a/templates/mesh-gateway-deployment.yaml +++ b/templates/mesh-gateway-deployment.yaml @@ -90,6 +90,13 @@ spec: volumeMounts: - name: consul-bin mountPath: /consul-bin + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }} {{- include "consul.getAutoEncryptClientCA" . | nindent 8 }} {{- end }} @@ -207,6 +214,13 @@ spec: mountPath: /consul/tls/ca readOnly: true {{- end }} + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" containers: - name: mesh-gateway image: {{ .Values.meshGateway.imageEnvoy | quote }} @@ -342,6 +356,13 @@ spec: {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }} - -token-file=/consul/service/acl-token {{- end }} + resources: + requests: + memory: "25Mi" + cpu: "10m" + limits: + memory: "25Mi" + cpu: "10m" {{- if .Values.meshGateway.priorityClassName }} priorityClassName: {{ .Values.meshGateway.priorityClassName | quote }} {{- end }} diff --git a/templates/server-acl-init-cleanup-job.yaml b/templates/server-acl-init-cleanup-job.yaml index 87f9b4531..c540fad57 100644 --- a/templates/server-acl-init-cleanup-job.yaml +++ b/templates/server-acl-init-cleanup-job.yaml @@ -52,6 +52,13 @@ spec: - delete-completed-job - -k8s-namespace={{ .Release.Namespace }} - {{ template "consul.fullname" . }}-server-acl-init + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- end }} {{- end }} diff --git a/templates/server-acl-init-job.yaml b/templates/server-acl-init-job.yaml index 689a5db22..7f0c2fe30 100644 --- a/templates/server-acl-init-job.yaml +++ b/templates/server-acl-init-job.yaml @@ -182,6 +182,13 @@ spec: {{- end }} {{- end }} {{- end }} + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- end }} {{- end }} diff --git a/templates/sync-catalog-deployment.yaml b/templates/sync-catalog-deployment.yaml index 6b35d66ed..d110402ec 100644 --- a/templates/sync-catalog-deployment.yaml +++ b/templates/sync-catalog-deployment.yaml @@ -176,6 +176,13 @@ spec: periodSeconds: 5 successThreshold: 1 timeoutSeconds: 5 + resources: + requests: + memory: "50Mi" + cpu: "50m" + limits: + memory: "50Mi" + cpu: "50m" {{- if or (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }} initContainers: {{- if (or .Values.global.acls.manageSystemACLs .Values.global.bootstrapACLs) }} @@ -188,6 +195,13 @@ spec: consul-k8s acl-init \ -secret-name="{{ template "consul.fullname" . }}-catalog-sync-acl-token" \ -k8s-namespace={{ .Release.Namespace }} + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- if (and .Values.global.tls.enabled .Values.global.tls.enableAutoEncrypt) }} {{- include "consul.getAutoEncryptClientCA" . | nindent 6 }} diff --git a/templates/tls-init-cleanup-job.yaml b/templates/tls-init-cleanup-job.yaml index cc125c5e6..5e50f5087 100644 --- a/templates/tls-init-cleanup-job.yaml +++ b/templates/tls-init-cleanup-job.yaml @@ -52,5 +52,12 @@ spec: curl -s -X DELETE --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT}/api/v1/namespaces/${NAMESPACE}/secrets/{{ template "consul.fullname" . }}-server-cert \ -H "Authorization: Bearer $( cat /var/run/secrets/kubernetes.io/serviceaccount/token )" + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- end }} diff --git a/templates/tls-init-job.yaml b/templates/tls-init-job.yaml index 29c6d8a7c..bc00f584c 100644 --- a/templates/tls-init-job.yaml +++ b/templates/tls-init-job.yaml @@ -110,5 +110,12 @@ spec: mountPath: /consul/tls/ca/key readOnly: true {{- end }} + resources: + requests: + memory: "25Mi" + cpu: "50m" + limits: + memory: "25Mi" + cpu: "50m" {{- end }} {{- end }} diff --git a/test/unit/client-daemonset.bats b/test/unit/client-daemonset.bats index 9a1531136..5fc75b66e 100755 --- a/test/unit/client-daemonset.bats +++ b/test/unit/client-daemonset.bats @@ -150,16 +150,16 @@ load _helpers #-------------------------------------------------------------------- # resources -@test "client/DaemonSet: no resources defined by default" { +@test "client/DaemonSet: resources defined by default" { cd `chart_dir` local actual=$(helm template \ -x templates/client-daemonset.yaml \ . | tee /dev/stderr | - yq -r '.spec.template.spec.containers[0].resources' | tee /dev/stderr) - [ "${actual}" = "null" ] + yq -rc '.spec.template.spec.containers[0].resources' | tee /dev/stderr) + [ "${actual}" = '{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}}' ] } -@test "client/DaemonSet: resources can be set" { +@test "client/DaemonSet: resources can be overridden" { cd `chart_dir` local actual=$(helm template \ -x templates/client-daemonset.yaml \ @@ -170,7 +170,7 @@ load _helpers } # Test support for the deprecated method of setting a YAML string. -@test "client/DaemonSet: resources can be set with string" { +@test "client/DaemonSet: resources can be overridden with string" { cd `chart_dir` local actual=$(helm template \ -x templates/client-daemonset.yaml \ diff --git a/test/unit/mesh-gateway-deployment.bats b/test/unit/mesh-gateway-deployment.bats index 5eb5f8e90..f124420a3 100755 --- a/test/unit/mesh-gateway-deployment.bats +++ b/test/unit/mesh-gateway-deployment.bats @@ -284,10 +284,10 @@ key2: value2' \ . | tee /dev/stderr | yq -r '.spec.template.spec.containers[0].resources' | tee /dev/stderr) - [ $(echo "${actual}" | yq -r '.requests.memory') = "128Mi" ] - [ $(echo "${actual}" | yq -r '.requests.cpu') = "250m" ] - [ $(echo "${actual}" | yq -r '.limits.memory') = "256Mi" ] - [ $(echo "${actual}" | yq -r '.limits.cpu') = "500m" ] + [ $(echo "${actual}" | yq -r '.requests.memory') = "100Mi" ] + [ $(echo "${actual}" | yq -r '.requests.cpu') = "100m" ] + [ $(echo "${actual}" | yq -r '.limits.memory') = "100Mi" ] + [ $(echo "${actual}" | yq -r '.limits.cpu') = "100m" ] } @test "meshGateway/Deployment: resources can be overridden" { diff --git a/test/unit/server-statefulset.bats b/test/unit/server-statefulset.bats index 39a64f6a2..d3f3e428e 100755 --- a/test/unit/server-statefulset.bats +++ b/test/unit/server-statefulset.bats @@ -83,16 +83,16 @@ load _helpers #-------------------------------------------------------------------- # resources -@test "server/StatefulSet: no resources defined by default" { +@test "server/StatefulSet: resources defined by default" { cd `chart_dir` local actual=$(helm template \ -x templates/server-statefulset.yaml \ . | tee /dev/stderr | - yq -r '.spec.template.spec.containers[0].resources' | tee /dev/stderr) - [ "${actual}" = "null" ] + yq -rc '.spec.template.spec.containers[0].resources' | tee /dev/stderr) + [ "${actual}" = '{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}}' ] } -@test "server/StatefulSet: resources can be set" { +@test "server/StatefulSet: resources can be overridden" { cd `chart_dir` local actual=$(helm template \ -x templates/server-statefulset.yaml \ @@ -103,7 +103,7 @@ load _helpers } # Test support for the deprecated method of setting a YAML string. -@test "server/StatefulSet: resources can be set with string" { +@test "server/StatefulSet: resources can be overridden with string" { cd `chart_dir` local actual=$(helm template \ -x templates/server-statefulset.yaml \ diff --git a/values.yaml b/values.yaml index 9f3179ed6..22f5af3f6 100644 --- a/values.yaml +++ b/values.yaml @@ -251,12 +251,14 @@ server: # via the extraConfig setting. connect: true - # Resource requests, limits, etc. for the server cluster placement. This - # should map directly to the value of the resources field for a PodSpec. - # By default no direct resource request is made. - # NOTE: The use of a YAML string is deprecated. Instead, set directly as a - # YAML map. - resources: null + # Resource settings for Server agents. + resources: + requests: + memory: "100Mi" + cpu: "100m" + limits: + memory: "100Mi" + cpu: "100m" # updatePartition is used to control a careful rolling update of Consul # servers. This should be done particularly when changing the version @@ -411,12 +413,14 @@ client: # also changes the clients' advertised IP to the hostIP rather than podIP. exposeGossipPorts: false - # Resource requests, limits, etc. for the client cluster placement. This - # should map directly to the value of the resources field for a PodSpec. - # By default no direct resource request is made. - # NOTE: The use of a YAML string is deprecated. Instead, set directly as a - # YAML map. - resources: null + # Resource settings for Client agents. + resources: + requests: + memory: "100Mi" + cpu: "100m" + limits: + memory: "100Mi" + cpu: "100m" # extraConfig is a raw string of extra configuration to set with the # client. This should be JSON. @@ -966,15 +970,13 @@ meshGateway: hostPort: null # Resource settings for mesh gateway pods. - # NOTE: The use of a YAML string is deprecated. Instead, set directly as a - # YAML map. resources: requests: - memory: "128Mi" - cpu: "250m" + memory: "100Mi" + cpu: "100m" limits: - memory: "256Mi" - cpu: "500m" + memory: "100Mi" + cpu: "100m" # By default, we set an anti affinity so that two gateway pods won't be # on the same node. NOTE: Gateways require that Consul client agents are