Merge branch 'master' of https://github.com/kubernetes/autoscaler

kubernetes · Mar 11, 2021 · f9faf1c · f9faf1c
2 parents 469853a + bb3b24c
commit f9faf1c
Show file tree

Hide file tree

Showing 1,910 changed files with 93,171 additions and 25,478 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -25,19 +25,24 @@ jobs:
         uses: actions/checkout@v2
       - name: Fetch history
         run: git fetch --prune --unshallow
-      - id: lint
-        name: Run chart-testing (lint)
+      - name: Set up chart-testing
         uses: helm/[email protected]
-        with:
-          command: lint
+      - name: Run chart-testing (lint)
+        run: ct lint
       # Only build a kind cluster if there are chart changes to test.
-      - if: steps.lint.outputs.changed == 'true'
+      - name: Run chart-testing (list-changed)
+        id: list-changed
+        run: |
+          changed=$(ct list-changed)
+          if [[ -n "$changed" ]]; then
+            echo "::set-output name=changed::true"
+          fi
+      - if: steps.list-changed.outputs.changed == 'true'
         name: Create kind cluster
         uses: helm/[email protected]
-      - name: Run chart-testing (install)
-        uses: helm/[email protected]
-        with:
-          command: install
+      - if: steps.list-changed.outputs.changed == 'true'
+        name: Run chart-testing (install)
+        run: ct install
   helm-docs-validate:
     if: ${{ needs.changes.outputs.charts == 'true' }}
     name: Helm Docs

diff --git a/charts/README.md b/charts/README.md
@@ -17,6 +17,8 @@ The binary for `pre-commit` can be installed via Homebrew:
 $ brew install pre-commit
 ```
 
+For those without Homebrew, Pre-commit has [other installation methods available](https://pre-commit.com/#install).
+
 ### Install git hooks
 
 After the `pre-commit` binary is installed, go to this repository's directory, and run the following command to install the git hook:

diff --git a/charts/cluster-autoscaler/Chart.yaml b/charts/cluster-autoscaler/Chart.yaml
@@ -3,7 +3,7 @@ appVersion: 1.20.0
 description: Scales Kubernetes worker nodes within autoscaling groups.
 engine: gotpl
 home: https://github.com/kubernetes/autoscaler
-icon: https://github.com/kubernetes/kubernetes/blob/master/logo/logo.png
+icon: https://github.com/kubernetes/kubernetes/raw/master/logo/logo.png
 maintainers:
   - email: [email protected]
     name: yurrriq
@@ -17,4 +17,4 @@ name: cluster-autoscaler
 sources:
   - https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
 type: application
-version: 9.6.0
+version: 9.8.0
diff --git a/charts/cluster-autoscaler/README.md b/charts/cluster-autoscaler/README.md
@@ -35,16 +35,21 @@ This chart bootstraps a cluster-autoscaler deployment on a [Kubernetes](http://k
 The previous `cluster-autoscaler` Helm chart hosted at [helm/charts](https://github.com/helm/charts) has been moved to this repository in accordance with the [Deprecation timeline](https://github.com/helm/charts#deprecation-timeline). Note that a few things have changed between this version and the old version:
 
 - This repository **only** supports Helm chart installations using Helm 3+ since the `apiVersion` on the charts has been marked as `v2`.
-- Previous versions of the Helm chart have not been migrated, and the version was reset to `1.0.0` initially. If you are looking for old versions of the chart, it's best to run `helm pull stable/cluster-autoscaler --version <your-version>` until you are ready to move to this repository's version.
-- The previous versioning scheme has been returned to as of version `9.0.0` for ease of migration from the previous chart location.
+- Previous versions of the Helm chart have not been migrated
 
 ## Migration from 1.X to 9.X+ versions of this Chart
 
-On initial adoption of this chart this chart was renamed from `cluster-autoscaler` to `cluster-autoscaler-chart` due to technical limitations. This affects all `1.X` releases of the chart.
+**TL;DR:**
+You should choose to use versions >=9.0.0 of the `cluster-autoscaler` chart published from this repository; previous versions, and the `cluster-autoscaler-chart` with versioning 1.X.X published from this repository are deprecated.
 
-Releases of the chart from `9.0.0` onwards return the naming of the chart to `cluster-autoscaler` and return to following the versioning established by the chart's previous location.
+<details>
+  <summary>Previous versions of this chart - further details</summary>
+On initial migration of this chart from the `helm/charts` repository this chart was renamed from `cluster-autoscaler` to `cluster-autoscaler-chart` due to technical limitations. This affected all `1.X` releases of the chart, version 2.0.0 of this chart exists only to mark the [`cluster-autoscaler-chart` chart](https://artifacthub.io/packages/helm/cluster-autoscaler/cluster-autoscaler-chart) as deprecated.
+
+Releases of the chart from `9.0.0` onwards return the naming of the chart to `cluster-autoscaler` and return to following the versioning established by the chart's previous location at .
 
 To migrate from a 1.X release of the chart to a `9.0.0` or later release, you should first uninstall your `1.X` install of the `cluster-autoscaler-chart` chart, before performing the installation of the new `cluster-autoscaler` chart.
+</details>
 
 ## Migration from 9.0 to 9.1
 
@@ -361,7 +366,7 @@ Though enough for the majority of installations, the default PodSecurityPolicy _
 | fullnameOverride | string | `""` | String to fully override `cluster-autoscaler.fullname` template. |
 | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |
 | image.pullSecrets | list | `[]` | Image pull secrets |
-| image.repository | string | `"us.gcr.io/k8s-artifacts-prod/autoscaling/cluster-autoscaler"` | Image repository |
+| image.repository | string | `"k8s.gcr.io/autoscaling/cluster-autoscaler"` | Image repository |
 | image.tag | string | `"v1.20.0"` | Image tag |
 | kubeTargetVersionOverride | string | `""` | Allow overriding the `.Capabilities.KubeVersion.GitVersion` check. Useful for `helm template` commands. |
 | magnumCABundlePath | string | `"/etc/kubernetes/ca-bundle.crt"` | Path to the host's CA bundle, from `ca-file` in the cloud-config file. |
@@ -380,6 +385,7 @@ Though enough for the majority of installations, the default PodSecurityPolicy _
 | rbac.create | bool | `true` | If `true`, create and use RBAC resources. |
 | rbac.pspEnabled | bool | `false` | If `true`, creates and uses RBAC resources required in the cluster with [Pod Security Policies](https://kubernetes.io/docs/concepts/policy/pod-security-policy/) enabled. Must be used with `rbac.create` set to `true`. |
 | rbac.serviceAccount.annotations | object | `{}` | Additional Service Account annotations. |
+| rbac.serviceAccount.automountServiceAccountToken | bool | `true` | Automount API credentials for a Service Account. |
 | rbac.serviceAccount.create | bool | `true` | If `true` and `rbac.create` is also true, a Service Account will be created. |
 | rbac.serviceAccount.name | string | `""` | The name of the ServiceAccount to use. If not set and create is `true`, a name is generated using the fullname template. |
 | replicaCount | int | `1` | Desired number of pods |

diff --git a/charts/cluster-autoscaler/README.md.gotmpl b/charts/cluster-autoscaler/README.md.gotmpl
@@ -35,16 +35,21 @@ This chart bootstraps a cluster-autoscaler deployment on a [Kubernetes](http://k
 The previous `cluster-autoscaler` Helm chart hosted at [helm/charts](https://github.com/helm/charts) has been moved to this repository in accordance with the [Deprecation timeline](https://github.com/helm/charts#deprecation-timeline). Note that a few things have changed between this version and the old version:
 
 - This repository **only** supports Helm chart installations using Helm 3+ since the `apiVersion` on the charts has been marked as `v2`.
-- Previous versions of the Helm chart have not been migrated, and the version was reset to `1.0.0` initially. If you are looking for old versions of the chart, it's best to run `helm pull stable/cluster-autoscaler --version <your-version>` until you are ready to move to this repository's version.
-- The previous versioning scheme has been returned to as of version `9.0.0` for ease of migration from the previous chart location.
+- Previous versions of the Helm chart have not been migrated
 
 ## Migration from 1.X to 9.X+ versions of this Chart
 
-On initial adoption of this chart this chart was renamed from `cluster-autoscaler` to `cluster-autoscaler-chart` due to technical limitations. This affects all `1.X` releases of the chart.
+**TL;DR:**
+You should choose to use versions >=9.0.0 of the `cluster-autoscaler` chart published from this repository; previous versions, and the `cluster-autoscaler-chart` with versioning 1.X.X published from this repository are deprecated.
 
-Releases of the chart from `9.0.0` onwards return the naming of the chart to `cluster-autoscaler` and return to following the versioning established by the chart's previous location.
+<details>
+  <summary>Previous versions of this chart - further details</summary>
+On initial migration of this chart from the `helm/charts` repository this chart was renamed from `cluster-autoscaler` to `cluster-autoscaler-chart` due to technical limitations. This affected all `1.X` releases of the chart, version 2.0.0 of this chart exists only to mark the [`cluster-autoscaler-chart` chart](https://artifacthub.io/packages/helm/cluster-autoscaler/cluster-autoscaler-chart) as deprecated.
+
+Releases of the chart from `9.0.0` onwards return the naming of the chart to `cluster-autoscaler` and return to following the versioning established by the chart's previous location at .
 
 To migrate from a 1.X release of the chart to a `9.0.0` or later release, you should first uninstall your `1.X` install of the `cluster-autoscaler-chart` chart, before performing the installation of the new `cluster-autoscaler` chart.
+</details>
 
 ## Migration from 9.0 to 9.1
 

diff --git a/charts/cluster-autoscaler/templates/serviceaccount.yaml b/charts/cluster-autoscaler/templates/serviceaccount.yaml
@@ -8,4 +8,5 @@ metadata:
 {{- if .Values.rbac.serviceAccount.annotations }}
   annotations: {{ toYaml .Values.rbac.serviceAccount.annotations | nindent 4 }}
 {{- end }}
+automountServiceAccountToken: {{ .Values.rbac.serviceAccount.automountServiceAccountToken }}
 {{- end }}
diff --git a/charts/cluster-autoscaler/values.yaml b/charts/cluster-autoscaler/values.yaml
@@ -192,7 +192,7 @@ fullnameOverride: ""
 
 image:
   # image.repository -- Image repository
-  repository: us.gcr.io/k8s-artifacts-prod/autoscaling/cluster-autoscaler
+  repository: k8s.gcr.io/autoscaling/cluster-autoscaler
   # image.tag -- Image tag
   tag: v1.20.0
   # image.pullPolicy -- Image pull policy
@@ -244,6 +244,8 @@ rbac:
     create: true
     # rbac.serviceAccount.name -- The name of the ServiceAccount to use. If not set and create is `true`, a name is generated using the fullname template.
     name: ""
+    # rbac.serviceAccount.automountServiceAccountToken -- Automount API credentials for a Service Account.
+    automountServiceAccountToken: true
 
 # replicaCount -- Desired number of pods
 replicaCount: 1

diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md
@@ -664,6 +664,7 @@ The following startup parameters are supported for cluster autoscaler:
 | `estimator` | Type of resource estimator to be used in scale up | binpacking
 | `expander` | Type of node group expander to be used in scale up.  | random
 | `write-status-configmap` | Should CA write status information to a configmap  | true
+| `status-config-map-name` | The name of the status ConfigMap that CA writes  | cluster-autoscaler-status
 | `max-inactivity` | Maximum time from last recorded autoscaler activity before automatic restart | 10 minutes
 | `max-failing-time` | Maximum time from last recorded successful autoscaler run before automatic restart | 15 minutes
 | `balance-similar-node-groups` | Detect similar node groups and balance the number of nodes between them | false

diff --git a/cluster-autoscaler/Makefile b/cluster-autoscaler/Makefile
@@ -35,20 +35,14 @@ export DOCKER_CLI_EXPERIMENTAL := enabled
 build: build-arch-$(GOARCH)
 
 build-arch-%: clean-arch-%
-	$(ENVVAR) GOOS=$(GOOS) GOARCH=$* go build ${LDFLAGS_FLAG} ${TAGS_FLAG} ./...
-	$(ENVVAR) GOOS=$(GOOS) GOARCH=$* go build -o cluster-autoscaler-$* ${LDFLAGS_FLAG} ${TAGS_FLAG}
-
-build-binary: build-binary-arch-$(GOARCH)
-
-build-binary-arch-%: clean-arch-%
 	$(ENVVAR) GOOS=$(GOOS) GOARCH=$* go build -o cluster-autoscaler-$* ${LDFLAGS_FLAG} ${TAGS_FLAG}
 
 test-unit: clean build
 	go test --test.short -race ./... ${TAGS_FLAG}
 
 dev-release: dev-release-arch-$(GOARCH)
 
-dev-release-arch-%: build-binary-arch-% make-image-arch-% push-image-arch-%
+dev-release-arch-%: build-arch-% make-image-arch-% push-image-arch-%
 	@echo "Release ${TAG}${FOR_PROVIDER}-$* completed"
 
 make-image: make-image-arch-$(GOARCH)
@@ -97,7 +91,7 @@ build-in-docker: build-in-docker-arch-$(GOARCH)
 
 build-in-docker-arch-%: clean-arch-% docker-builder
 	docker run ${RM_FLAG} -v `pwd`:/gopath/src/k8s.io/autoscaler/cluster-autoscaler/:Z autoscaling-builder:latest \
-		bash -c 'cd /gopath/src/k8s.io/autoscaler/cluster-autoscaler && BUILD_TAGS=${BUILD_TAGS} LDFLAGS="${LDFLAGS}" make build-binary-arch-$*'
+		bash -c 'cd /gopath/src/k8s.io/autoscaler/cluster-autoscaler && BUILD_TAGS=${BUILD_TAGS} LDFLAGS="${LDFLAGS}" make build-arch-$*'
 
 release: $(addprefix build-in-docker-arch-,$(ALL_ARCH)) execute-release
 	@echo "Full in-docker release ${TAG}${FOR_PROVIDER} completed"

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
@@ -42,6 +42,7 @@ var (
 		"nvidia-tesla-k80":  {},
 		"nvidia-tesla-p100": {},
 		"nvidia-tesla-v100": {},
+		"nvidia-tesla-t4":   {},
 	}
 )
 

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util.go b/cluster-autoscaler/cloudprovider/aws/aws_util.go
@@ -76,7 +76,7 @@ func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) {
 			klog.V(1).Infof("fetching %s\n", url)
 			res, err := http.Get(url)
 			if err != nil {
-				klog.Warningf("Error fetching %s skipping...\n", url)
+				klog.Warningf("Error fetching %s skipping...\n%s\n", url, err)
 				continue
 			}
 

diff --git a/cluster-autoscaler/cloudprovider/gce/reserved.go b/cluster-autoscaler/cloudprovider/gce/reserved.go
@@ -57,6 +57,15 @@ const (
 	// Reserved memory for software IO TLB
 	swiotlbReservedMemory  = 64 * MiB
 	swiotlbThresholdMemory = 3 * GiB
+
+	// Memory Estimation Correction
+	// correctionConstant is a linear constant for additional reserved memory
+	correctionConstant = 0.00175
+	// maximumCorrectionValue is the max-cap for additional reserved memory
+	maximumCorrectionValue = 248 * MiB
+	// ubuntuSpecificOffset is a constant value that is additionally added to Ubuntu
+	// based distributions as reserved memory
+	ubuntuSpecificOffset = 4 * MiB
 )
 
 // EvictionHard is the struct used to keep parsed values for eviction
@@ -67,7 +76,7 @@ type EvictionHard struct {
 
 // CalculateKernelReserved computes how much memory Linux kernel will reserve.
 // TODO(jkaniuk): account for crashkernel reservation on RHEL / CentOS
-func CalculateKernelReserved(physicalMemory int64, os OperatingSystem) int64 {
+func CalculateKernelReserved(physicalMemory int64, os OperatingSystem, osDistribution OperatingSystemDistribution) int64 {
 	switch os {
 	case OperatingSystemLinux:
 		// Account for memory reserved by kernel
@@ -77,6 +86,18 @@ func CalculateKernelReserved(physicalMemory int64, os OperatingSystem) int64 {
 		if physicalMemory > swiotlbThresholdMemory {
 			reserved += swiotlbReservedMemory
 		}
+
+		// Additional reserved memory to correct estimation
+		// The reason for this value is we detected additional reservation, but we were
+		// unable to find the root cause. Hence, we added a best estimated formula that was
+		// statistically developed.
+		if osDistribution == OperatingSystemDistributionCOS || osDistribution == OperatingSystemDistributionCOSContainerd {
+			reserved += int64(math.Min(correctionConstant*float64(physicalMemory), maximumCorrectionValue))
+		} else if osDistribution == OperatingSystemDistributionUbuntu || osDistribution == OperatingSystemDistributionUbuntuContainerd {
+			reserved += int64(math.Min(correctionConstant*float64(physicalMemory), maximumCorrectionValue))
+			reserved += ubuntuSpecificOffset
+		}
+
 		return reserved
 	case OperatingSystemWindows:
 		return 0

diff --git a/cluster-autoscaler/cloudprovider/gce/reserved_test.go b/cluster-autoscaler/cloudprovider/gce/reserved_test.go
@@ -18,6 +18,7 @@ package gce
 
 import (
 	"fmt"
+	"math"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -27,37 +28,78 @@ func TestCalculateKernelReservedLinux(t *testing.T) {
 	type testCase struct {
 		physicalMemory int64
 		reservedMemory int64
+		osDistribution OperatingSystemDistribution
 	}
 	testCases := []testCase{
 		{
 			physicalMemory: 256 * MiB,
 			reservedMemory: 4*MiB + kernelReservedMemory,
+			osDistribution: OperatingSystemDistributionCOS,
 		},
 		{
 			physicalMemory: 2 * GiB,
 			reservedMemory: 32*MiB + kernelReservedMemory,
+			osDistribution: OperatingSystemDistributionCOS,
 		},
 		{
 			physicalMemory: 3 * GiB,
 			reservedMemory: 48*MiB + kernelReservedMemory,
+			osDistribution: OperatingSystemDistributionCOS,
 		},
 		{
 			physicalMemory: 3.25 * GiB,
 			reservedMemory: 52*MiB + kernelReservedMemory + swiotlbReservedMemory,
+			osDistribution: OperatingSystemDistributionCOS,
 		},
 		{
 			physicalMemory: 4 * GiB,
 			reservedMemory: 64*MiB + kernelReservedMemory + swiotlbReservedMemory,
+			osDistribution: OperatingSystemDistributionCOS,
 		},
 		{
 			physicalMemory: 128 * GiB,
 			reservedMemory: 2*GiB + kernelReservedMemory + swiotlbReservedMemory,
+			osDistribution: OperatingSystemDistributionUbuntu,
+		},
+		{
+			physicalMemory: 256 * MiB,
+			reservedMemory: 4*MiB + kernelReservedMemory,
+			osDistribution: OperatingSystemDistributionUbuntu,
+		},
+		{
+			physicalMemory: 2 * GiB,
+			reservedMemory: 32*MiB + kernelReservedMemory,
+			osDistribution: OperatingSystemDistributionUbuntu,
+		},
+		{
+			physicalMemory: 3 * GiB,
+			reservedMemory: 48*MiB + kernelReservedMemory,
+			osDistribution: OperatingSystemDistributionUbuntu,
+		},
+		{
+			physicalMemory: 3.25 * GiB,
+			reservedMemory: 52*MiB + kernelReservedMemory + swiotlbReservedMemory,
+			osDistribution: OperatingSystemDistributionUbuntu,
+		},
+		{
+			physicalMemory: 4 * GiB,
+			reservedMemory: 64*MiB + kernelReservedMemory + swiotlbReservedMemory,
+			osDistribution: OperatingSystemDistributionUbuntu,
+		},
+		{
+			physicalMemory: 128 * GiB,
+			reservedMemory: 2*GiB + kernelReservedMemory + swiotlbReservedMemory,
+			osDistribution: OperatingSystemDistributionUbuntu,
 		},
 	}
 	for idx, tc := range testCases {
 		t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
-			reserved := CalculateKernelReserved(tc.physicalMemory, OperatingSystemLinux)
-			assert.Equal(t, tc.reservedMemory, reserved)
+			reserved := CalculateKernelReserved(tc.physicalMemory, OperatingSystemLinux, tc.osDistribution)
+			if tc.osDistribution == OperatingSystemDistributionUbuntu {
+				assert.Equal(t, tc.reservedMemory+int64(math.Min(correctionConstant*float64(tc.physicalMemory), maximumCorrectionValue)+ubuntuSpecificOffset), reserved)
+			} else if tc.osDistribution == OperatingSystemDistributionCOS {
+				assert.Equal(t, tc.reservedMemory+int64(math.Min(correctionConstant*float64(tc.physicalMemory), maximumCorrectionValue)), reserved)
+			}
 		})
 	}
 }
diff --git a/cluster-autoscaler/cloudprovider/gce/templates.go b/cluster-autoscaler/cloudprovider/gce/templates.go
@@ -63,7 +63,7 @@ func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []
 	}
 
 	capacity[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI)
-	memTotal := mem - CalculateKernelReserved(mem, os)
+	memTotal := mem - CalculateKernelReserved(mem, os, osDistribution)
 	capacity[apiv1.ResourceMemory] = *resource.NewQuantity(memTotal, resource.DecimalSI)
 
 	if accelerators != nil && len(accelerators) > 0 {

diff --git a/cluster-autoscaler/cloudprovider/gce/templates_test.go b/cluster-autoscaler/cloudprovider/gce/templates_test.go
@@ -18,6 +18,7 @@ package gce
 
 import (
 	"fmt"
+	"math"
 	"strings"
 	"testing"
 
@@ -454,19 +455,19 @@ func TestBuildCapacityMemory(t *testing.T) {
 			physicalCpu:            1,
 			physicalMemory:         2 * units.GiB,
 			os:                     OperatingSystemLinux,
-			expectedCapacityMemory: 2*units.GiB - 32*units.MiB - kernelReservedMemory,
+			expectedCapacityMemory: 2*units.GiB - 32*units.MiB - kernelReservedMemory - int64(math.Min(correctionConstant*float64(2*units.GiB), maximumCorrectionValue)),
 		},
 		{
 			physicalCpu:            2,
 			physicalMemory:         4 * units.GiB,
 			os:                     OperatingSystemLinux,
-			expectedCapacityMemory: 4*units.GiB - 64*units.MiB - kernelReservedMemory - swiotlbReservedMemory,
+			expectedCapacityMemory: 4*units.GiB - 64*units.MiB - kernelReservedMemory - swiotlbReservedMemory - int64(math.Min(correctionConstant*float64(4*units.GiB), maximumCorrectionValue)),
 		},
 		{
 			physicalCpu:            32,
 			physicalMemory:         128 * units.GiB,
 			os:                     OperatingSystemLinux,
-			expectedCapacityMemory: 128*units.GiB - 2*units.GiB - kernelReservedMemory - swiotlbReservedMemory,
+			expectedCapacityMemory: 128*units.GiB - 2*units.GiB - kernelReservedMemory - swiotlbReservedMemory - int64(math.Min(correctionConstant*float64(128*units.GiB), maximumCorrectionValue)),
 		},
 		{
 			physicalCpu:            2,