From 0ed57d64c44e1c80f14661e76453a6aa59ea684f Mon Sep 17 00:00:00 2001 From: Felipe Esteves Date: Sun, 29 Jan 2023 10:03:40 -0300 Subject: [PATCH] feat: add blue/green upgrade strategy settings --- README.md | 5 ++ autogen/main/README.md | 5 ++ autogen/main/cluster.tf.tmpl | 27 ++++++++-- autogen/main/variables.tf.tmpl | 43 ++++++++++++++- cluster.tf | 54 ++++++++++++++++--- .../README.md | 12 +++++ .../cluster.tf | 54 ++++++++++++++++--- .../variables.tf | 43 ++++++++++++++- modules/beta-private-cluster/README.md | 12 +++++ modules/beta-private-cluster/cluster.tf | 54 ++++++++++++++++--- modules/beta-private-cluster/variables.tf | 43 ++++++++++++++- .../README.md | 12 +++++ .../cluster.tf | 54 ++++++++++++++++--- .../variables.tf | 43 ++++++++++++++- modules/beta-public-cluster/README.md | 12 +++++ modules/beta-public-cluster/cluster.tf | 54 ++++++++++++++++--- modules/beta-public-cluster/variables.tf | 43 ++++++++++++++- .../private-cluster-update-variant/README.md | 5 ++ .../private-cluster-update-variant/cluster.tf | 54 ++++++++++++++++--- modules/private-cluster/README.md | 5 ++ modules/private-cluster/cluster.tf | 54 ++++++++++++++++--- 21 files changed, 638 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 2212da8113..74b029c309 100644 --- a/README.md +++ b/README.md @@ -276,8 +276,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | | node_count | The number of nodes in the nodepool when autoscaling is false. Otherwise defaults to 1. Only valid for non-autoscaling clusters | | Required | diff --git a/autogen/main/README.md b/autogen/main/README.md index 40182f7c34..1bd5984aa5 100644 --- a/autogen/main/README.md +++ b/autogen/main/README.md @@ -212,8 +212,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | {% if beta_cluster %} diff --git a/autogen/main/cluster.tf.tmpl b/autogen/main/cluster.tf.tmpl index 5d2b3cc9ff..5cea26579c 100644 --- a/autogen/main/cluster.tf.tmpl +++ b/autogen/main/cluster.tf.tmpl @@ -684,9 +684,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { diff --git a/autogen/main/variables.tf.tmpl b/autogen/main/variables.tf.tmpl index 188f90c356..446f684943 100644 --- a/autogen/main/variables.tf.tmpl +++ b/autogen/main/variables.tf.tmpl @@ -727,7 +727,6 @@ variable "enable_pod_security_policy" { default = false } - variable "enable_l4_ilb_subsetting" { type = bool description = "Enable L4 ILB Subsetting on the cluster" @@ -751,5 +750,47 @@ variable "enable_identity_service" { description = "Enable the Identity Service component, which allows customers to use external identity providers with the K8S API." default = false } + +variable "strategy" { + type = string + description = "The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional)" + default = "SURGE" +} + +variable "max_surge" { + type = number + description = "The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional)" + default = null +} + +variable "max_unavailable" { + type = number + description = "The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional)" + default = null +} + +variable "node_pool_soak_duration" { + type = string + description = "Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional)" + default = "3600s" +} + +variable "batch_soak_duration" { + type = string + description = "Soak time after each batch gets drained (Optionial)" + default = "0s" +} + +variable "batch_percentage" { + type = string + description = "Percentage of the blue pool nodes to drain in a batch (Optional)" + default = null +} + +variable "batch_node_count" { + type = number + description = "The number of blue nodes to drain in a batch (Optional)" + default = null +} {% endif %} {% endif %} diff --git a/cluster.tf b/cluster.tf index 49520613ee..c86070e9a0 100644 --- a/cluster.tf +++ b/cluster.tf @@ -405,9 +405,30 @@ resource "google_container_node_pool" "pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { @@ -577,9 +598,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { diff --git a/modules/beta-private-cluster-update-variant/README.md b/modules/beta-private-cluster-update-variant/README.md index d084f718f8..8157436571 100644 --- a/modules/beta-private-cluster-update-variant/README.md +++ b/modules/beta-private-cluster-update-variant/README.md @@ -163,6 +163,9 @@ Then perform the following commands on the root folder: | add\_master\_webhook\_firewall\_rules | Create master\_webhook firewall rules for ports defined in `firewall_inbound_ports` | `bool` | `false` | no | | add\_shadow\_firewall\_rules | Create GKE shadow firewall (the same as default firewall rules with firewall logs enabled). | `bool` | `false` | no | | authenticator\_security\_group | The name of the RBAC security group for use with Google security groups in Kubernetes RBAC. Group name must be in format gke-security-groups@yourdomain.com | `string` | `null` | no | +| batch\_node\_count | The number of blue nodes to drain in a batch (Optional) | `number` | `null` | no | +| batch\_percentage | Percentage of the blue pool nodes to drain in a batch (Optional) | `string` | `null` | no | +| batch\_soak\_duration | Soak time after each batch gets drained (Optionial) | `string` | `"0s"` | no | | cloudrun | (Beta) Enable CloudRun addon | `bool` | `false` | no | | cloudrun\_load\_balancer\_type | (Beta) Configure the Cloud Run load balancer type. External by default. Set to `LOAD_BALANCER_TYPE_INTERNAL` to configure as an internal load balancer. | `string` | `""` | no | | cluster\_autoscaling | Cluster autoscaling configuration. See [more details](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1beta1/projects.locations.clusters#clusterautoscaling) |
object({
enabled = bool
autoscaling_profile = string
min_cpu_cores = number
max_cpu_cores = number
min_memory_gb = number
max_memory_gb = number
gpu_resources = list(object({ resource_type = string, minimum = number, maximum = number }))
auto_repair = bool
auto_upgrade = bool
})
|
{
"auto_repair": true,
"auto_upgrade": true,
"autoscaling_profile": "BALANCED",
"enabled": false,
"gpu_resources": [],
"max_cpu_cores": 0,
"max_memory_gb": 0,
"min_cpu_cores": 0,
"min_memory_gb": 0
}
| no | @@ -227,6 +230,8 @@ Then perform the following commands on the root folder: | master\_authorized\_networks | List of master authorized networks. If none are provided, disallow external access (except the cluster node IPs, which GKE automatically whitelists). | `list(object({ cidr_block = string, display_name = string }))` | `[]` | no | | master\_global\_access\_enabled | Whether the cluster master is accessible globally (from any region) or only within the same region as the private endpoint. | `bool` | `true` | no | | master\_ipv4\_cidr\_block | (Beta) The IP range in CIDR notation to use for the hosted master network | `string` | `"10.0.0.0/28"` | no | +| max\_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max\_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional) | `number` | `null` | no | +| max\_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max\_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional) | `number` | `null` | no | | monitoring\_enable\_managed\_prometheus | Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled. | `bool` | `false` | no | | monitoring\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration. | `list(string)` | `[]` | no | | monitoring\_service | The monitoring service that the cluster should write metrics to. Automatically send metrics from pods in the cluster to the Google Cloud Monitoring API. VM metrics will be collected by Google Compute Engine regardless of this setting Available options include monitoring.googleapis.com, monitoring.googleapis.com/kubernetes (beta) and none | `string` | `"monitoring.googleapis.com/kubernetes"` | no | @@ -236,6 +241,7 @@ Then perform the following commands on the root folder: | network\_policy\_provider | The network policy provider. | `string` | `"CALICO"` | no | | network\_project\_id | The project ID of the shared VPC's host (for shared vpc support) | `string` | `""` | no | | node\_metadata | Specifies how node metadata is exposed to the workload running on the node | `string` | `"GKE_METADATA"` | no | +| node\_pool\_soak\_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional) | `string` | `"3600s"` | no | | node\_pools | List of maps containing node pools | `list(map(any))` |
[
{
"name": "default-node-pool"
}
]
| no | | node\_pools\_labels | Map of maps containing node labels by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | | node\_pools\_linux\_node\_configs\_sysctls | Map of maps containing linux node config sysctls by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | @@ -259,6 +265,7 @@ Then perform the following commands on the root folder: | shadow\_firewall\_rules\_log\_config | The log\_config for shadow firewall rules. You can set this variable to `null` to disable logging. |
object({
metadata = string
})
|
{
"metadata": "INCLUDE_ALL_METADATA"
}
| no | | shadow\_firewall\_rules\_priority | The firewall priority of GKE shadow firewall rules. The priority should be less than default firewall, which is 1000. | `number` | `999` | no | | skip\_provisioners | Flag to skip all local-exec provisioners. It breaks `stub_domains` and `upstream_nameservers` variables functionality. | `bool` | `false` | no | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional) | `string` | `"SURGE"` | no | | stub\_domains | Map of stub domains and their resolvers to forward DNS queries for a certain domain to an external DNS server | `map(list(string))` | `{}` | no | | subnetwork | The subnetwork to host the cluster in (required) | `string` | n/a | yes | | timeouts | Timeout for cluster operations. | `map(string)` | `{}` | no | @@ -341,8 +348,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | | placement_policy | Placement type to set for nodes in a node pool. Can be set as [COMPACT](https://cloud.google.com/kubernetes-engine/docs/how-to/compact-placement#overview) if desired | Optional | diff --git a/modules/beta-private-cluster-update-variant/cluster.tf b/modules/beta-private-cluster-update-variant/cluster.tf index 5eb7470a18..044dcd3e19 100644 --- a/modules/beta-private-cluster-update-variant/cluster.tf +++ b/modules/beta-private-cluster-update-variant/cluster.tf @@ -594,9 +594,30 @@ resource "google_container_node_pool" "pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { @@ -806,9 +827,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { diff --git a/modules/beta-private-cluster-update-variant/variables.tf b/modules/beta-private-cluster-update-variant/variables.tf index 7fa57dfe6d..df3bb71e20 100644 --- a/modules/beta-private-cluster-update-variant/variables.tf +++ b/modules/beta-private-cluster-update-variant/variables.tf @@ -697,7 +697,6 @@ variable "enable_pod_security_policy" { default = false } - variable "enable_l4_ilb_subsetting" { type = bool description = "Enable L4 ILB Subsetting on the cluster" @@ -721,3 +720,45 @@ variable "enable_identity_service" { description = "Enable the Identity Service component, which allows customers to use external identity providers with the K8S API." default = false } + +variable "strategy" { + type = string + description = "The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional)" + default = "SURGE" +} + +variable "max_surge" { + type = number + description = "The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional)" + default = null +} + +variable "max_unavailable" { + type = number + description = "The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional)" + default = null +} + +variable "node_pool_soak_duration" { + type = string + description = "Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional)" + default = "3600s" +} + +variable "batch_soak_duration" { + type = string + description = "Soak time after each batch gets drained (Optionial)" + default = "0s" +} + +variable "batch_percentage" { + type = string + description = "Percentage of the blue pool nodes to drain in a batch (Optional)" + default = null +} + +variable "batch_node_count" { + type = number + description = "The number of blue nodes to drain in a batch (Optional)" + default = null +} diff --git a/modules/beta-private-cluster/README.md b/modules/beta-private-cluster/README.md index dde80c4623..8f708d87c6 100644 --- a/modules/beta-private-cluster/README.md +++ b/modules/beta-private-cluster/README.md @@ -141,6 +141,9 @@ Then perform the following commands on the root folder: | add\_master\_webhook\_firewall\_rules | Create master\_webhook firewall rules for ports defined in `firewall_inbound_ports` | `bool` | `false` | no | | add\_shadow\_firewall\_rules | Create GKE shadow firewall (the same as default firewall rules with firewall logs enabled). | `bool` | `false` | no | | authenticator\_security\_group | The name of the RBAC security group for use with Google security groups in Kubernetes RBAC. Group name must be in format gke-security-groups@yourdomain.com | `string` | `null` | no | +| batch\_node\_count | The number of blue nodes to drain in a batch (Optional) | `number` | `null` | no | +| batch\_percentage | Percentage of the blue pool nodes to drain in a batch (Optional) | `string` | `null` | no | +| batch\_soak\_duration | Soak time after each batch gets drained (Optionial) | `string` | `"0s"` | no | | cloudrun | (Beta) Enable CloudRun addon | `bool` | `false` | no | | cloudrun\_load\_balancer\_type | (Beta) Configure the Cloud Run load balancer type. External by default. Set to `LOAD_BALANCER_TYPE_INTERNAL` to configure as an internal load balancer. | `string` | `""` | no | | cluster\_autoscaling | Cluster autoscaling configuration. See [more details](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1beta1/projects.locations.clusters#clusterautoscaling) |
object({
enabled = bool
autoscaling_profile = string
min_cpu_cores = number
max_cpu_cores = number
min_memory_gb = number
max_memory_gb = number
gpu_resources = list(object({ resource_type = string, minimum = number, maximum = number }))
auto_repair = bool
auto_upgrade = bool
})
|
{
"auto_repair": true,
"auto_upgrade": true,
"autoscaling_profile": "BALANCED",
"enabled": false,
"gpu_resources": [],
"max_cpu_cores": 0,
"max_memory_gb": 0,
"min_cpu_cores": 0,
"min_memory_gb": 0
}
| no | @@ -205,6 +208,8 @@ Then perform the following commands on the root folder: | master\_authorized\_networks | List of master authorized networks. If none are provided, disallow external access (except the cluster node IPs, which GKE automatically whitelists). | `list(object({ cidr_block = string, display_name = string }))` | `[]` | no | | master\_global\_access\_enabled | Whether the cluster master is accessible globally (from any region) or only within the same region as the private endpoint. | `bool` | `true` | no | | master\_ipv4\_cidr\_block | (Beta) The IP range in CIDR notation to use for the hosted master network | `string` | `"10.0.0.0/28"` | no | +| max\_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max\_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional) | `number` | `null` | no | +| max\_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max\_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional) | `number` | `null` | no | | monitoring\_enable\_managed\_prometheus | Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled. | `bool` | `false` | no | | monitoring\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration. | `list(string)` | `[]` | no | | monitoring\_service | The monitoring service that the cluster should write metrics to. Automatically send metrics from pods in the cluster to the Google Cloud Monitoring API. VM metrics will be collected by Google Compute Engine regardless of this setting Available options include monitoring.googleapis.com, monitoring.googleapis.com/kubernetes (beta) and none | `string` | `"monitoring.googleapis.com/kubernetes"` | no | @@ -214,6 +219,7 @@ Then perform the following commands on the root folder: | network\_policy\_provider | The network policy provider. | `string` | `"CALICO"` | no | | network\_project\_id | The project ID of the shared VPC's host (for shared vpc support) | `string` | `""` | no | | node\_metadata | Specifies how node metadata is exposed to the workload running on the node | `string` | `"GKE_METADATA"` | no | +| node\_pool\_soak\_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional) | `string` | `"3600s"` | no | | node\_pools | List of maps containing node pools | `list(map(any))` |
[
{
"name": "default-node-pool"
}
]
| no | | node\_pools\_labels | Map of maps containing node labels by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | | node\_pools\_linux\_node\_configs\_sysctls | Map of maps containing linux node config sysctls by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | @@ -237,6 +243,7 @@ Then perform the following commands on the root folder: | shadow\_firewall\_rules\_log\_config | The log\_config for shadow firewall rules. You can set this variable to `null` to disable logging. |
object({
metadata = string
})
|
{
"metadata": "INCLUDE_ALL_METADATA"
}
| no | | shadow\_firewall\_rules\_priority | The firewall priority of GKE shadow firewall rules. The priority should be less than default firewall, which is 1000. | `number` | `999` | no | | skip\_provisioners | Flag to skip all local-exec provisioners. It breaks `stub_domains` and `upstream_nameservers` variables functionality. | `bool` | `false` | no | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional) | `string` | `"SURGE"` | no | | stub\_domains | Map of stub domains and their resolvers to forward DNS queries for a certain domain to an external DNS server | `map(list(string))` | `{}` | no | | subnetwork | The subnetwork to host the cluster in (required) | `string` | n/a | yes | | timeouts | Timeout for cluster operations. | `map(string)` | `{}` | no | @@ -319,8 +326,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | | placement_policy | Placement type to set for nodes in a node pool. Can be set as [COMPACT](https://cloud.google.com/kubernetes-engine/docs/how-to/compact-placement#overview) if desired | Optional | diff --git a/modules/beta-private-cluster/cluster.tf b/modules/beta-private-cluster/cluster.tf index bd72dc97c9..f1e168e447 100644 --- a/modules/beta-private-cluster/cluster.tf +++ b/modules/beta-private-cluster/cluster.tf @@ -500,9 +500,30 @@ resource "google_container_node_pool" "pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { @@ -711,9 +732,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { diff --git a/modules/beta-private-cluster/variables.tf b/modules/beta-private-cluster/variables.tf index 7fa57dfe6d..df3bb71e20 100644 --- a/modules/beta-private-cluster/variables.tf +++ b/modules/beta-private-cluster/variables.tf @@ -697,7 +697,6 @@ variable "enable_pod_security_policy" { default = false } - variable "enable_l4_ilb_subsetting" { type = bool description = "Enable L4 ILB Subsetting on the cluster" @@ -721,3 +720,45 @@ variable "enable_identity_service" { description = "Enable the Identity Service component, which allows customers to use external identity providers with the K8S API." default = false } + +variable "strategy" { + type = string + description = "The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional)" + default = "SURGE" +} + +variable "max_surge" { + type = number + description = "The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional)" + default = null +} + +variable "max_unavailable" { + type = number + description = "The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional)" + default = null +} + +variable "node_pool_soak_duration" { + type = string + description = "Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional)" + default = "3600s" +} + +variable "batch_soak_duration" { + type = string + description = "Soak time after each batch gets drained (Optionial)" + default = "0s" +} + +variable "batch_percentage" { + type = string + description = "Percentage of the blue pool nodes to drain in a batch (Optional)" + default = null +} + +variable "batch_node_count" { + type = number + description = "The number of blue nodes to drain in a batch (Optional)" + default = null +} diff --git a/modules/beta-public-cluster-update-variant/README.md b/modules/beta-public-cluster-update-variant/README.md index b366957d8f..531b45b1a0 100644 --- a/modules/beta-public-cluster-update-variant/README.md +++ b/modules/beta-public-cluster-update-variant/README.md @@ -157,6 +157,9 @@ Then perform the following commands on the root folder: | add\_master\_webhook\_firewall\_rules | Create master\_webhook firewall rules for ports defined in `firewall_inbound_ports` | `bool` | `false` | no | | add\_shadow\_firewall\_rules | Create GKE shadow firewall (the same as default firewall rules with firewall logs enabled). | `bool` | `false` | no | | authenticator\_security\_group | The name of the RBAC security group for use with Google security groups in Kubernetes RBAC. Group name must be in format gke-security-groups@yourdomain.com | `string` | `null` | no | +| batch\_node\_count | The number of blue nodes to drain in a batch (Optional) | `number` | `null` | no | +| batch\_percentage | Percentage of the blue pool nodes to drain in a batch (Optional) | `string` | `null` | no | +| batch\_soak\_duration | Soak time after each batch gets drained (Optionial) | `string` | `"0s"` | no | | cloudrun | (Beta) Enable CloudRun addon | `bool` | `false` | no | | cloudrun\_load\_balancer\_type | (Beta) Configure the Cloud Run load balancer type. External by default. Set to `LOAD_BALANCER_TYPE_INTERNAL` to configure as an internal load balancer. | `string` | `""` | no | | cluster\_autoscaling | Cluster autoscaling configuration. See [more details](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1beta1/projects.locations.clusters#clusterautoscaling) |
object({
enabled = bool
autoscaling_profile = string
min_cpu_cores = number
max_cpu_cores = number
min_memory_gb = number
max_memory_gb = number
gpu_resources = list(object({ resource_type = string, minimum = number, maximum = number }))
auto_repair = bool
auto_upgrade = bool
})
|
{
"auto_repair": true,
"auto_upgrade": true,
"autoscaling_profile": "BALANCED",
"enabled": false,
"gpu_resources": [],
"max_cpu_cores": 0,
"max_memory_gb": 0,
"min_cpu_cores": 0,
"min_memory_gb": 0
}
| no | @@ -216,6 +219,8 @@ Then perform the following commands on the root folder: | maintenance\_recurrence | Frequency of the recurring maintenance window in RFC5545 format. | `string` | `""` | no | | maintenance\_start\_time | Time window specified for daily or recurring maintenance operations in RFC3339 format | `string` | `"05:00"` | no | | master\_authorized\_networks | List of master authorized networks. If none are provided, disallow external access (except the cluster node IPs, which GKE automatically whitelists). | `list(object({ cidr_block = string, display_name = string }))` | `[]` | no | +| max\_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max\_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional) | `number` | `null` | no | +| max\_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max\_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional) | `number` | `null` | no | | monitoring\_enable\_managed\_prometheus | Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled. | `bool` | `false` | no | | monitoring\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration. | `list(string)` | `[]` | no | | monitoring\_service | The monitoring service that the cluster should write metrics to. Automatically send metrics from pods in the cluster to the Google Cloud Monitoring API. VM metrics will be collected by Google Compute Engine regardless of this setting Available options include monitoring.googleapis.com, monitoring.googleapis.com/kubernetes (beta) and none | `string` | `"monitoring.googleapis.com/kubernetes"` | no | @@ -225,6 +230,7 @@ Then perform the following commands on the root folder: | network\_policy\_provider | The network policy provider. | `string` | `"CALICO"` | no | | network\_project\_id | The project ID of the shared VPC's host (for shared vpc support) | `string` | `""` | no | | node\_metadata | Specifies how node metadata is exposed to the workload running on the node | `string` | `"GKE_METADATA"` | no | +| node\_pool\_soak\_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional) | `string` | `"3600s"` | no | | node\_pools | List of maps containing node pools | `list(map(any))` |
[
{
"name": "default-node-pool"
}
]
| no | | node\_pools\_labels | Map of maps containing node labels by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | | node\_pools\_linux\_node\_configs\_sysctls | Map of maps containing linux node config sysctls by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | @@ -248,6 +254,7 @@ Then perform the following commands on the root folder: | shadow\_firewall\_rules\_log\_config | The log\_config for shadow firewall rules. You can set this variable to `null` to disable logging. |
object({
metadata = string
})
|
{
"metadata": "INCLUDE_ALL_METADATA"
}
| no | | shadow\_firewall\_rules\_priority | The firewall priority of GKE shadow firewall rules. The priority should be less than default firewall, which is 1000. | `number` | `999` | no | | skip\_provisioners | Flag to skip all local-exec provisioners. It breaks `stub_domains` and `upstream_nameservers` variables functionality. | `bool` | `false` | no | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional) | `string` | `"SURGE"` | no | | stub\_domains | Map of stub domains and their resolvers to forward DNS queries for a certain domain to an external DNS server | `map(list(string))` | `{}` | no | | subnetwork | The subnetwork to host the cluster in (required) | `string` | n/a | yes | | timeouts | Timeout for cluster operations. | `map(string)` | `{}` | no | @@ -328,8 +335,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | | placement_policy | Placement type to set for nodes in a node pool. Can be set as [COMPACT](https://cloud.google.com/kubernetes-engine/docs/how-to/compact-placement#overview) if desired | Optional | diff --git a/modules/beta-public-cluster-update-variant/cluster.tf b/modules/beta-public-cluster-update-variant/cluster.tf index 6b348b6111..e5e1b28b55 100644 --- a/modules/beta-public-cluster-update-variant/cluster.tf +++ b/modules/beta-public-cluster-update-variant/cluster.tf @@ -574,9 +574,30 @@ resource "google_container_node_pool" "pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { @@ -785,9 +806,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { diff --git a/modules/beta-public-cluster-update-variant/variables.tf b/modules/beta-public-cluster-update-variant/variables.tf index 970d25f40d..6a1e56e88f 100644 --- a/modules/beta-public-cluster-update-variant/variables.tf +++ b/modules/beta-public-cluster-update-variant/variables.tf @@ -667,7 +667,6 @@ variable "enable_pod_security_policy" { default = false } - variable "enable_l4_ilb_subsetting" { type = bool description = "Enable L4 ILB Subsetting on the cluster" @@ -691,3 +690,45 @@ variable "enable_identity_service" { description = "Enable the Identity Service component, which allows customers to use external identity providers with the K8S API." default = false } + +variable "strategy" { + type = string + description = "The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional)" + default = "SURGE" +} + +variable "max_surge" { + type = number + description = "The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional)" + default = null +} + +variable "max_unavailable" { + type = number + description = "The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional)" + default = null +} + +variable "node_pool_soak_duration" { + type = string + description = "Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional)" + default = "3600s" +} + +variable "batch_soak_duration" { + type = string + description = "Soak time after each batch gets drained (Optionial)" + default = "0s" +} + +variable "batch_percentage" { + type = string + description = "Percentage of the blue pool nodes to drain in a batch (Optional)" + default = null +} + +variable "batch_node_count" { + type = number + description = "The number of blue nodes to drain in a batch (Optional)" + default = null +} diff --git a/modules/beta-public-cluster/README.md b/modules/beta-public-cluster/README.md index c018d01fd6..817c291c67 100644 --- a/modules/beta-public-cluster/README.md +++ b/modules/beta-public-cluster/README.md @@ -135,6 +135,9 @@ Then perform the following commands on the root folder: | add\_master\_webhook\_firewall\_rules | Create master\_webhook firewall rules for ports defined in `firewall_inbound_ports` | `bool` | `false` | no | | add\_shadow\_firewall\_rules | Create GKE shadow firewall (the same as default firewall rules with firewall logs enabled). | `bool` | `false` | no | | authenticator\_security\_group | The name of the RBAC security group for use with Google security groups in Kubernetes RBAC. Group name must be in format gke-security-groups@yourdomain.com | `string` | `null` | no | +| batch\_node\_count | The number of blue nodes to drain in a batch (Optional) | `number` | `null` | no | +| batch\_percentage | Percentage of the blue pool nodes to drain in a batch (Optional) | `string` | `null` | no | +| batch\_soak\_duration | Soak time after each batch gets drained (Optionial) | `string` | `"0s"` | no | | cloudrun | (Beta) Enable CloudRun addon | `bool` | `false` | no | | cloudrun\_load\_balancer\_type | (Beta) Configure the Cloud Run load balancer type. External by default. Set to `LOAD_BALANCER_TYPE_INTERNAL` to configure as an internal load balancer. | `string` | `""` | no | | cluster\_autoscaling | Cluster autoscaling configuration. See [more details](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1beta1/projects.locations.clusters#clusterautoscaling) |
object({
enabled = bool
autoscaling_profile = string
min_cpu_cores = number
max_cpu_cores = number
min_memory_gb = number
max_memory_gb = number
gpu_resources = list(object({ resource_type = string, minimum = number, maximum = number }))
auto_repair = bool
auto_upgrade = bool
})
|
{
"auto_repair": true,
"auto_upgrade": true,
"autoscaling_profile": "BALANCED",
"enabled": false,
"gpu_resources": [],
"max_cpu_cores": 0,
"max_memory_gb": 0,
"min_cpu_cores": 0,
"min_memory_gb": 0
}
| no | @@ -194,6 +197,8 @@ Then perform the following commands on the root folder: | maintenance\_recurrence | Frequency of the recurring maintenance window in RFC5545 format. | `string` | `""` | no | | maintenance\_start\_time | Time window specified for daily or recurring maintenance operations in RFC3339 format | `string` | `"05:00"` | no | | master\_authorized\_networks | List of master authorized networks. If none are provided, disallow external access (except the cluster node IPs, which GKE automatically whitelists). | `list(object({ cidr_block = string, display_name = string }))` | `[]` | no | +| max\_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max\_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional) | `number` | `null` | no | +| max\_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max\_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional) | `number` | `null` | no | | monitoring\_enable\_managed\_prometheus | Configuration for Managed Service for Prometheus. Whether or not the managed collection is enabled. | `bool` | `false` | no | | monitoring\_enabled\_components | List of services to monitor: SYSTEM\_COMPONENTS, WORKLOADS (provider version >= 3.89.0). Empty list is default GKE configuration. | `list(string)` | `[]` | no | | monitoring\_service | The monitoring service that the cluster should write metrics to. Automatically send metrics from pods in the cluster to the Google Cloud Monitoring API. VM metrics will be collected by Google Compute Engine regardless of this setting Available options include monitoring.googleapis.com, monitoring.googleapis.com/kubernetes (beta) and none | `string` | `"monitoring.googleapis.com/kubernetes"` | no | @@ -203,6 +208,7 @@ Then perform the following commands on the root folder: | network\_policy\_provider | The network policy provider. | `string` | `"CALICO"` | no | | network\_project\_id | The project ID of the shared VPC's host (for shared vpc support) | `string` | `""` | no | | node\_metadata | Specifies how node metadata is exposed to the workload running on the node | `string` | `"GKE_METADATA"` | no | +| node\_pool\_soak\_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional) | `string` | `"3600s"` | no | | node\_pools | List of maps containing node pools | `list(map(any))` |
[
{
"name": "default-node-pool"
}
]
| no | | node\_pools\_labels | Map of maps containing node labels by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | | node\_pools\_linux\_node\_configs\_sysctls | Map of maps containing linux node config sysctls by node-pool name | `map(map(string))` |
{
"all": {},
"default-node-pool": {}
}
| no | @@ -226,6 +232,7 @@ Then perform the following commands on the root folder: | shadow\_firewall\_rules\_log\_config | The log\_config for shadow firewall rules. You can set this variable to `null` to disable logging. |
object({
metadata = string
})
|
{
"metadata": "INCLUDE_ALL_METADATA"
}
| no | | shadow\_firewall\_rules\_priority | The firewall priority of GKE shadow firewall rules. The priority should be less than default firewall, which is 1000. | `number` | `999` | no | | skip\_provisioners | Flag to skip all local-exec provisioners. It breaks `stub_domains` and `upstream_nameservers` variables functionality. | `bool` | `false` | no | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional) | `string` | `"SURGE"` | no | | stub\_domains | Map of stub domains and their resolvers to forward DNS queries for a certain domain to an external DNS server | `map(list(string))` | `{}` | no | | subnetwork | The subnetwork to host the cluster in (required) | `string` | n/a | yes | | timeouts | Timeout for cluster operations. | `map(string)` | `{}` | no | @@ -306,8 +313,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | | placement_policy | Placement type to set for nodes in a node pool. Can be set as [COMPACT](https://cloud.google.com/kubernetes-engine/docs/how-to/compact-placement#overview) if desired | Optional | diff --git a/modules/beta-public-cluster/cluster.tf b/modules/beta-public-cluster/cluster.tf index d21864f65b..3188c6a41d 100644 --- a/modules/beta-public-cluster/cluster.tf +++ b/modules/beta-public-cluster/cluster.tf @@ -480,9 +480,30 @@ resource "google_container_node_pool" "pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { @@ -690,9 +711,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { diff --git a/modules/beta-public-cluster/variables.tf b/modules/beta-public-cluster/variables.tf index 970d25f40d..6a1e56e88f 100644 --- a/modules/beta-public-cluster/variables.tf +++ b/modules/beta-public-cluster/variables.tf @@ -667,7 +667,6 @@ variable "enable_pod_security_policy" { default = false } - variable "enable_l4_ilb_subsetting" { type = bool description = "Enable L4 ILB Subsetting on the cluster" @@ -691,3 +690,45 @@ variable "enable_identity_service" { description = "Enable the Identity Service component, which allows customers to use external identity providers with the K8S API." default = false } + +variable "strategy" { + type = string + description = "The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE`; `BLUE_GREEN`. By default strategy is `SURGE` (Optional)" + default = "SURGE" +} + +variable "max_surge" { + type = number + description = "The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater (Optional)" + default = null +} + +variable "max_unavailable" { + type = number + description = "The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater (Optional)" + default = null +} + +variable "node_pool_soak_duration" { + type = string + description = "Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up (Optional)" + default = "3600s" +} + +variable "batch_soak_duration" { + type = string + description = "Soak time after each batch gets drained (Optionial)" + default = "0s" +} + +variable "batch_percentage" { + type = string + description = "Percentage of the blue pool nodes to drain in a batch (Optional)" + default = null +} + +variable "batch_node_count" { + type = number + description = "The number of blue nodes to drain in a batch (Optional)" + default = null +} diff --git a/modules/private-cluster-update-variant/README.md b/modules/private-cluster-update-variant/README.md index 8d952ab9c1..36f54fb399 100644 --- a/modules/private-cluster-update-variant/README.md +++ b/modules/private-cluster-update-variant/README.md @@ -311,8 +311,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | | node_count | The number of nodes in the nodepool when autoscaling is false. Otherwise defaults to 1. Only valid for non-autoscaling clusters | | Required | diff --git a/modules/private-cluster-update-variant/cluster.tf b/modules/private-cluster-update-variant/cluster.tf index eafefad05f..7e7e329dc5 100644 --- a/modules/private-cluster-update-variant/cluster.tf +++ b/modules/private-cluster-update-variant/cluster.tf @@ -518,9 +518,30 @@ resource "google_container_node_pool" "pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { @@ -691,9 +712,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { diff --git a/modules/private-cluster/README.md b/modules/private-cluster/README.md index e82ae76c2b..061b7566cb 100644 --- a/modules/private-cluster/README.md +++ b/modules/private-cluster/README.md @@ -289,8 +289,13 @@ The node_pools variable takes the following parameters: | min_cpu_platform | Minimum CPU platform to be used by the nodes in the pool. The nodes may be scheduled on the specified or newer CPU platform. | " " | Optional | | max_count | Maximum number of nodes in the NodePool. Must be >= min_count | 100 | Optional | | max_pods_per_node | The maximum number of pods per node in this cluster | null | Optional | +| strategy | The upgrade stragey to be used for upgrading the nodes. Valid values of state are: `SURGE` or `BLUE_GREEN` | "SURGE" | Optional | | max_surge | The number of additional nodes that can be added to the node pool during an upgrade. Increasing max_surge raises the number of nodes that can be upgraded simultaneously. Can be set to 0 or greater. | 1 | Optional | | max_unavailable | The number of nodes that can be simultaneously unavailable during an upgrade. Increasing max_unavailable raises the number of nodes that can be upgraded in parallel. Can be set to 0 or greater. | 0 | Optional | +| node_pool_soak_duration | Time needed after draining the entire blue pool. After this period, the blue pool will be cleaned up. By default, it is set to one hour (3600 seconds). The maximum length of the soak time is 7 days (604,800 seconds). | "3600s" | Optional | +| batch_soak_duration | Soak time after each batch gets drained, with the default being zero seconds. | "0s" | Optional | +| batch_node_count | Absolute number of nodes to drain in a batch. If it is set to zero, this phase will be skipped. | null | Optional | +| batch_percentage | Percentage of nodes to drain in a batch. Must be in the range of [0.0, 1.0]. If it is set to zero, this phase will be skipped. | null | Optional | | min_count | Minimum number of nodes in the NodePool. Must be >=0 and <= max_count. Should be used when autoscaling is true | 1 | Optional | | name | The name of the node pool | | Required | | node_count | The number of nodes in the nodepool when autoscaling is false. Otherwise defaults to 1. Only valid for non-autoscaling clusters | | Required | diff --git a/modules/private-cluster/cluster.tf b/modules/private-cluster/cluster.tf index 9711d27d98..0333154d67 100644 --- a/modules/private-cluster/cluster.tf +++ b/modules/private-cluster/cluster.tf @@ -424,9 +424,30 @@ resource "google_container_node_pool" "pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config { @@ -596,9 +617,30 @@ resource "google_container_node_pool" "windows_pools" { auto_upgrade = lookup(each.value, "auto_upgrade", local.default_auto_upgrade) } - upgrade_settings { - max_surge = lookup(each.value, "max_surge", 1) - max_unavailable = lookup(each.value, "max_unavailable", 0) + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "SURGE" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "SURGE") + max_surge = lookup(each.value, "max_surge", 1) + max_unavailable = lookup(each.value, "max_unavailable", 0) + } + } + + dynamic "upgrade_settings" { + for_each = lookup(each.value, "strategy", var.strategy) == "BLUE_GREEN" ? [each.value] : [] + content { + strategy = lookup(each.value, "strategy", "BLUE_GREEN") + + blue_green_settings { + node_pool_soak_duration = lookup(each.value, "node_pool_soak_duration", "3600s") + + standard_rollout_policy { + batch_soak_duration = lookup(each.value, "batch_soak_duration", "60s") + batch_percentage = lookup(each.value, "batch_percentage", null) + batch_node_count = lookup(each.value, "batch_node_count", null) + } + } + } } node_config {