kubernetes · k8s-ci-robot · Aug 7, 2019 · Mar 1, 2019 · Apr 29, 2019 · Apr 4, 2019
diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md
@@ -138,8 +138,9 @@ If you'd like to scale node groups from 0, an `autoscaling:DescribeLaunchConfigu
 ```
 
 ## Common Notes and Gotchas:
-- The `/etc/ssl/certs/ca-certificates.crt` should exist by default on your ec2 instance. If you use Amazon Linux 2, use `/etc/ssl/certs/ca-bundle.crt` instead.
-- Cluster autoscaler is not zone aware (for now), so if you wish to span multiple availability zones in your autoscaling groups beware that cluster autoscaler will not evenly distribute them. For more information, see https://github.com/kubernetes/contrib/pull/1552#discussion_r75532949.
+- The `/etc/ssl/certs/ca-bundle.crt` should exist by default on ec2 instance in your EKS cluster. If you use other cluster privision tools like [kops](https://github.com/kubernetes/kops) with different operating systems other than Amazon Linux 2, please use `/etc/ssl/certs/ca-certificates.crt` or correct path on your host instead for the volume hostPath in your cluster autoscaler manifest.
+- Cluster autoscaler does not support Auto Scaling Groups which span multiple Availability Zones; instead you should use an Auto Scaling Group for each Availability Zone and enable the [--balance-similar-node-groups](../../FAQ.md#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler) feature. If you do use a single Auto Scaling Group that spans multiple Availability Zones you will find that AWS unexpectedly terminates nodes without them being drained because of the [rebalancing feature](https://docs.aws.amazon.com/autoscaling/ec2/userguide/auto-scaling-benefits.html#arch-AutoScalingMultiAZ).
+- EBS volumes cannot span multiple AWS Availability Zones. If you have a Pod with Persistent Volume in an AZ, It must be running on a k8s/EKS node which is in the same Availability Zone of the Persistent Volume. If AWS Auto Scaling Group launches a new k8s/EKS node in different AZ and moves this Pod into the new node, The Persistent volume in previous AZ will not be available from the new AZ. The pod will stay in Pending status. The Workaround is using a single AZ for the k8s/EKS nodes.
 - By default, cluster autoscaler will not terminate nodes running pods in the kube-system namespace. You can override this default behaviour by passing in the `--skip-nodes-with-system-pods=false` flag.
 - By default, cluster autoscaler will wait 10 minutes between scale down operations, you can adjust this using the `--scale-down-delay-after-add`, `--scale-down-delay-after-delete`, and `--scale-down-delay-after-failure` flag. E.g. `--scale-down-delay-after-add=5m` to decrease the scale down delay to 5 minutes after a node has been added.
 - If you're running multiple ASGs, the `--expander` flag supports three options: `random`, `most-pods` and `least-waste`. `random` will expand a random ASG on scale up. `most-pods` will scale up the ASG that will scheduable the most amount of pods. `least-waste` will expand the ASG that will waste the least amount of CPU/MEM resources. In the event of a tie, cluster autoscaler will fall back to `random`.
diff --git a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go
@@ -27,6 +27,42 @@ type instanceType struct {
 
 // InstanceTypes is a map of ec2 resources
 var InstanceTypes = map[string]*instanceType{
+	"a1": {
+		InstanceType: "a1",
+		VCPU:         16,
+		MemoryMb:     0,
+		GPU:          0,
+	},
+	"a1.2xlarge": {
+		InstanceType: "a1.2xlarge",
+		VCPU:         8,
+		MemoryMb:     16384,
+		GPU:          0,
+	},
+	"a1.4xlarge": {
+		InstanceType: "a1.4xlarge",
+		VCPU:         16,
+		MemoryMb:     32768,
+		GPU:          0,
+	},
+	"a1.large": {
+		InstanceType: "a1.large",
+		VCPU:         2,
+		MemoryMb:     4096,
+		GPU:          0,
+	},
+	"a1.medium": {
+		InstanceType: "a1.medium",
+		VCPU:         1,
+		MemoryMb:     2048,
+		GPU:          0,
+	},
+	"a1.xlarge": {
+		InstanceType: "a1.xlarge",
+		VCPU:         4,
+		MemoryMb:     8192,
+		GPU:          0,
+	},
 	"c1.medium": {
 		InstanceType: "c1.medium",
 		VCPU:         2,
@@ -195,6 +231,48 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     8192,
 		GPU:          0,
 	},
+	"c5n": {
+		InstanceType: "c5n",
+		VCPU:         72,
+		MemoryMb:     0,
+		GPU:          0,
+	},
+	"c5n.18xlarge": {
+		InstanceType: "c5n.18xlarge",
+		VCPU:         72,
+		MemoryMb:     196608,
+		GPU:          0,
+	},
+	"c5n.2xlarge": {
+		InstanceType: "c5n.2xlarge",
+		VCPU:         8,
+		MemoryMb:     21504,
+		GPU:          0,
+	},
+	"c5n.4xlarge": {
+		InstanceType: "c5n.4xlarge",
+		VCPU:         16,
+		MemoryMb:     43008,
+		GPU:          0,
+	},
+	"c5n.9xlarge": {
+		InstanceType: "c5n.9xlarge",
+		VCPU:         36,
+		MemoryMb:     98304,
+		GPU:          0,
+	},
+	"c5n.large": {
+		InstanceType: "c5n.large",
+		VCPU:         2,
+		MemoryMb:     5376,
+		GPU:          0,
+	},
+	"c5n.xlarge": {
+		InstanceType: "c5n.xlarge",
+		VCPU:         4,
+		MemoryMb:     10752,
+		GPU:          0,
+	},
 	"cc2.8xlarge": {
 		InstanceType: "cc2.8xlarge",
 		VCPU:         32,
@@ -307,7 +385,7 @@ var InstanceTypes = map[string]*instanceType{
 		InstanceType: "g3s.xlarge",
 		VCPU:         4,
 		MemoryMb:     31232,
-		GPU:          0,
+		GPU:          1,
 	},
 	"h1": {
 		InstanceType: "h1",
@@ -573,6 +651,12 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     8192,
 		GPU:          0,
 	},
+	"m5.metal": {
+		InstanceType: "m5.metal",
+		VCPU:         96,
+		MemoryMb:     393216,
+		GPU:          0,
+	},
 	"m5.xlarge": {
 		InstanceType: "m5.xlarge",
 		VCPU:         4,
@@ -615,6 +699,42 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     16384,
 		GPU:          0,
 	},
+	"m5ad.12xlarge": {
+		InstanceType: "m5ad.12xlarge",
+		VCPU:         48,
+		MemoryMb:     196608,
+		GPU:          0,
+	},
+	"m5ad.24xlarge": {
+		InstanceType: "m5ad.24xlarge",
+		VCPU:         96,
+		MemoryMb:     393216,
+		GPU:          0,
+	},
+	"m5ad.2xlarge": {
+		InstanceType: "m5ad.2xlarge",
+		VCPU:         8,
+		MemoryMb:     32768,
+		GPU:          0,
+	},
+	"m5ad.4xlarge": {
+		InstanceType: "m5ad.4xlarge",
+		VCPU:         16,
+		MemoryMb:     65536,
+		GPU:          0,
+	},
+	"m5ad.large": {
+		InstanceType: "m5ad.large",
+		VCPU:         2,
+		MemoryMb:     8192,
+		GPU:          0,
+	},
+	"m5ad.xlarge": {
+		InstanceType: "m5ad.xlarge",
+		VCPU:         4,
+		MemoryMb:     16384,
+		GPU:          0,
+	},
 	"m5d": {
 		InstanceType: "m5d",
 		VCPU:         96,
@@ -651,6 +771,12 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     8192,
 		GPU:          0,
 	},
+	"m5d.metal": {
+		InstanceType: "m5d.metal",
+		VCPU:         96,
+		MemoryMb:     393216,
+		GPU:          0,
+	},
 	"m5d.xlarge": {
 		InstanceType: "m5d.xlarge",
 		VCPU:         4,
@@ -705,6 +831,18 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     249856,
 		GPU:          4,
 	},
+	"p3dn": {
+		InstanceType: "p3dn",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          8,
+	},
+	"p3dn.24xlarge": {
+		InstanceType: "p3dn.24xlarge",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          8,
+	},
 	"r3": {
 		InstanceType: "r3",
 		VCPU:         32,
@@ -819,6 +957,12 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     16384,
 		GPU:          0,
 	},
+	"r5.metal": {
+		InstanceType: "r5.metal",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          0,
+	},
 	"r5.xlarge": {
 		InstanceType: "r5.xlarge",
 		VCPU:         4,
@@ -861,6 +1005,42 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     32768,
 		GPU:          0,
 	},
+	"r5ad.12xlarge": {
+		InstanceType: "r5ad.12xlarge",
+		VCPU:         48,
+		MemoryMb:     393216,
+		GPU:          0,
+	},
+	"r5ad.24xlarge": {
+		InstanceType: "r5ad.24xlarge",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          0,
+	},
+	"r5ad.2xlarge": {
+		InstanceType: "r5ad.2xlarge",
+		VCPU:         8,
+		MemoryMb:     65536,
+		GPU:          0,
+	},
+	"r5ad.4xlarge": {
+		InstanceType: "r5ad.4xlarge",
+		VCPU:         16,
+		MemoryMb:     131072,
+		GPU:          0,
+	},
+	"r5ad.large": {
+		InstanceType: "r5ad.large",
+		VCPU:         2,
+		MemoryMb:     16384,
+		GPU:          0,
+	},
+	"r5ad.xlarge": {
+		InstanceType: "r5ad.xlarge",
+		VCPU:         4,
+		MemoryMb:     32768,
+		GPU:          0,
+	},
 	"r5d": {
 		InstanceType: "r5d",
 		VCPU:         96,
@@ -897,6 +1077,12 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     16384,
 		GPU:          0,
 	},
+	"r5d.metal": {
+		InstanceType: "r5d.metal",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          0,
+	},
 	"r5d.xlarge": {
 		InstanceType: "r5d.xlarge",
 		VCPU:         4,
@@ -993,6 +1179,48 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     16384,
 		GPU:          0,
 	},
+	"t3a.2xlarge": {
+		InstanceType: "t3a.2xlarge",
+		VCPU:         8,
+		MemoryMb:     32768,
+		GPU:          0,
+	},
+	"t3a.large": {
+		InstanceType: "t3a.large",
+		VCPU:         2,
+		MemoryMb:     8192,
+		GPU:          0,
+	},
+	"t3a.medium": {
+		InstanceType: "t3a.medium",
+		VCPU:         2,
+		MemoryMb:     4096,
+		GPU:          0,
+	},
+	"t3a.micro": {
+		InstanceType: "t3a.micro",
+		VCPU:         2,
+		MemoryMb:     1024,
+		GPU:          0,
+	},
+	"t3a.nano": {
+		InstanceType: "t3a.nano",
+		VCPU:         2,
+		MemoryMb:     512,
+		GPU:          0,
+	},
+	"t3a.small": {
+		InstanceType: "t3a.small",
+		VCPU:         2,
+		MemoryMb:     2048,
+		GPU:          0,
+	},
+	"t3a.xlarge": {
+		InstanceType: "t3a.xlarge",
+		VCPU:         4,
+		MemoryMb:     16384,
+		GPU:          0,
+	},
 	"u-12tb1": {
 		InstanceType: "u-12tb1",
 		VCPU:         448,
@@ -1107,6 +1335,12 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     16384,
 		GPU:          0,
 	},
+	"z1d.metal": {
+		InstanceType: "z1d.metal",
+		VCPU:         48,
+		MemoryMb:     393216,
+		GPU:          0,
+	},
 	"z1d.xlarge": {
 		InstanceType: "z1d.xlarge",
 		VCPU:         4,

diff --git a/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml b/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml
@@ -42,7 +42,7 @@ rules:
   resources: ["poddisruptionbudgets"]
   verbs: ["watch","list"]
 - apiGroups: ["apps"]
-  resources: ["statefulsets"]
+  resources: ["statefulsets", "replicasets"]
   verbs: ["watch","list","get"]
 - apiGroups: ["storage.k8s.io"]
   resources: ["storageclasses"]
@@ -121,7 +121,7 @@ spec:
     spec:
       serviceAccountName: cluster-autoscaler
       containers:
-        - image: k8s.gcr.io/cluster-autoscaler:v1.2.2
+        - image: k8s.gcr.io/cluster-autoscaler:v1.12.7
           name: cluster-autoscaler
           resources:
             limits:
@@ -146,4 +146,4 @@ spec:
       volumes:
         - name: ssl-certs
           hostPath:
-            path: "/etc/ssl/certs/ca-certificates.crt"
+            path: "/etc/ssl/certs/ca-bundle.crt"
diff --git a/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-multi-asg.yaml b/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-multi-asg.yaml
@@ -42,7 +42,7 @@ rules:
   resources: ["poddisruptionbudgets"]
   verbs: ["watch","list"]
 - apiGroups: ["apps"]
-  resources: ["statefulsets"]
+  resources: ["statefulsets", "replicasets"]
   verbs: ["watch","list","get"]
 - apiGroups: ["storage.k8s.io"]
   resources: ["storageclasses"]
@@ -121,7 +121,7 @@ spec:
     spec:
       serviceAccountName: cluster-autoscaler
       containers:
-        - image: k8s.gcr.io/cluster-autoscaler:v1.2.2
+        - image: k8s.gcr.io/cluster-autoscaler:v1.12.7
           name: cluster-autoscaler
           resources:
             limits:
@@ -147,4 +147,4 @@ spec:
       volumes:
         - name: ssl-certs
           hostPath:
-            path: "/etc/ssl/certs/ca-certificates.crt"
+            path: "/etc/ssl/certs/ca-bundle.crt"