From 1d73e9d56ee0026aa09ddc4f8e6b2a2c325c1c93 Mon Sep 17 00:00:00 2001 From: Daniel Seymour Date: Wed, 20 Mar 2019 10:31:41 -0700 Subject: [PATCH] Fix issue with regional cluster roll outs causing version skews Regional clusters are created using the newest version of GKE that is available across all zones in which the masters live. When a GKE version roll out occurs, the available versions across the zones can become skewed for zonal clusters with version x.y.z-gke.a being the only available zonal version in one zone but version x.y[+1].z[+1]-gke.a[+1] being the only zonal version available in another zone. The Terraform module only checks for the version available in the first zone returned by a call to the google_compute_zones data resource. Consequently, the module will fail to create a regional cluster during a roll out due to the version available in that zone not being available across all the zones for regional clusters. --- auth.tf | 2 +- autogen/cluster_regional.tf | 4 ++-- autogen/cluster_zonal.tf | 4 ++-- autogen/main.tf | 24 ++++++++++++++----- cluster_regional.tf | 4 ++-- cluster_zonal.tf | 6 ++--- examples/deploy_service/main.tf | 5 ++++ examples/node_pool/main.tf | 7 +++++- examples/shared_vpc/main.tf | 5 ++++ examples/simple_regional/main.tf | 5 ++++ examples/simple_zonal/main.tf | 5 ++++ examples/stub_domains/main.tf | 5 ++++ main.tf | 26 +++++++++++++++------ modules/private-cluster/cluster_regional.tf | 4 ++-- modules/private-cluster/cluster_zonal.tf | 4 ++-- modules/private-cluster/main.tf | 22 +++++++++++++---- modules/private-cluster/variables.tf | 2 +- variables.tf | 2 +- 18 files changed, 101 insertions(+), 35 deletions(-) diff --git a/auth.tf b/auth.tf index 5ad4160145..4ec9fefa18 100644 --- a/auth.tf +++ b/auth.tf @@ -31,4 +31,4 @@ provider "kubernetes" { host = "https://${local.cluster_endpoint}" token = "${data.google_client_config.default.access_token}" cluster_ca_certificate = "${base64decode(local.cluster_ca_certificate)}" -} +} \ No newline at end of file diff --git a/autogen/cluster_regional.tf b/autogen/cluster_regional.tf index a9462ed12d..9cbad9ecb1 100644 --- a/autogen/cluster_regional.tf +++ b/autogen/cluster_regional.tf @@ -31,7 +31,7 @@ resource "google_container_cluster" "primary" { network = "${replace(data.google_compute_network.gke_network.self_link, "https://www.googleapis.com/compute/v1/", "")}" subnetwork = "${replace(data.google_compute_subnetwork.gke_subnetwork.self_link, "https://www.googleapis.com/compute/v1/", "")}" - min_master_version = "${local.kubernetes_version}" + min_master_version = "${local.kubernetes_version_regional}" logging_service = "${var.logging_service}" monitoring_service = "${var.monitoring_service}" @@ -104,7 +104,7 @@ resource "google_container_node_pool" "pools" { project = "${var.project_id}" region = "${var.region}" cluster = "${var.name}" - version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version)}" + version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version_regional)}" initial_node_count = "${lookup(var.node_pools[count.index], "initial_node_count", lookup(var.node_pools[count.index], "min_count", 1))}" autoscaling { diff --git a/autogen/cluster_zonal.tf b/autogen/cluster_zonal.tf index 20a72ec582..1c2b7096a9 100644 --- a/autogen/cluster_zonal.tf +++ b/autogen/cluster_zonal.tf @@ -31,7 +31,7 @@ resource "google_container_cluster" "zonal_primary" { network = "${replace(data.google_compute_network.gke_network.self_link, "https://www.googleapis.com/compute/v1/", "")}" subnetwork = "${replace(data.google_compute_subnetwork.gke_subnetwork.self_link, "https://www.googleapis.com/compute/v1/", "")}" - min_master_version = "${local.kubernetes_version}" + min_master_version = "${local.kubernetes_version_zonal}" logging_service = "${var.logging_service}" monitoring_service = "${var.monitoring_service}" @@ -104,7 +104,7 @@ resource "google_container_node_pool" "zonal_pools" { project = "${var.project_id}" zone = "${var.zones[0]}" cluster = "${var.name}" - version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version)}" + version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version_zonal)}" initial_node_count = "${lookup(var.node_pools[count.index], "initial_node_count", lookup(var.node_pools[count.index], "min_count", 1))}" autoscaling { diff --git a/autogen/main.tf b/autogen/main.tf index e3f405bbdd..18187d2ac0 100644 --- a/autogen/main.tf +++ b/autogen/main.tf @@ -31,10 +31,12 @@ resource "random_shuffle" "available_zones" { } locals { - kubernetes_version = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.region.latest_node_version}" - node_version = "${var.node_version != "" ? var.node_version : local.kubernetes_version}" - custom_kube_dns_config = "${length(keys(var.stub_domains)) > 0 ? true : false}" - network_project_id = "${var.network_project_id != "" ? var.network_project_id : var.project_id}" + kubernetes_version_regional = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.region.latest_master_version}" + kubernetes_version_zonal = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.zone.latest_master_version}" + node_version_regional = "${var.node_version != "" && var.regional ? var.node_version : local.kubernetes_version_regional}" + node_version_zonal = "${var.node_version != "" && !var.regional ? var.node_version : local.kubernetes_version_zonal}" + custom_kube_dns_config = "${length(keys(var.stub_domains)) > 0 ? true : false}" + network_project_id = "${var.network_project_id != "" ? var.network_project_id : var.project_id}" cluster_type = "${var.regional ? "regional" : "zonal"}" @@ -149,7 +151,17 @@ locals { Get available container engine versions *****************************************/ data "google_container_engine_versions" "region" { - provider = "{% if private_cluster %}google-beta{%else %}google{% endif %}" - zone = "${data.google_compute_zones.available.names[0]}" + provider = "google-beta" + region = "${var.region}" + project = "${var.project_id}" +} + +data "google_container_engine_versions" "zone" { + provider = "google-beta" + // Work around to prevent a lack of zone declaration from causing regional cluster creation from erroring out due to error + // + // data.google_container_engine_versions.zone: Cannot determine zone: set in this resource, or set provider-level zone. + // + zone = "${var.zones[0] == "" ? data.google_compute_zones.available.names[0] : var.zones[0]}" project = "${var.project_id}" } diff --git a/cluster_regional.tf b/cluster_regional.tf index 3b66761494..69d6ede11d 100644 --- a/cluster_regional.tf +++ b/cluster_regional.tf @@ -31,7 +31,7 @@ resource "google_container_cluster" "primary" { network = "${replace(data.google_compute_network.gke_network.self_link, "https://www.googleapis.com/compute/v1/", "")}" subnetwork = "${replace(data.google_compute_subnetwork.gke_subnetwork.self_link, "https://www.googleapis.com/compute/v1/", "")}" - min_master_version = "${local.kubernetes_version}" + min_master_version = "${local.kubernetes_version_regional}" logging_service = "${var.logging_service}" monitoring_service = "${var.monitoring_service}" @@ -97,7 +97,7 @@ resource "google_container_node_pool" "pools" { project = "${var.project_id}" region = "${var.region}" cluster = "${var.name}" - version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version)}" + version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version_regional)}" initial_node_count = "${lookup(var.node_pools[count.index], "initial_node_count", lookup(var.node_pools[count.index], "min_count", 1))}" autoscaling { diff --git a/cluster_zonal.tf b/cluster_zonal.tf index 9f074650c7..f038ca643a 100644 --- a/cluster_zonal.tf +++ b/cluster_zonal.tf @@ -31,7 +31,7 @@ resource "google_container_cluster" "zonal_primary" { network = "${replace(data.google_compute_network.gke_network.self_link, "https://www.googleapis.com/compute/v1/", "")}" subnetwork = "${replace(data.google_compute_subnetwork.gke_subnetwork.self_link, "https://www.googleapis.com/compute/v1/", "")}" - min_master_version = "${local.kubernetes_version}" + min_master_version = "${local.kubernetes_version_zonal}" logging_service = "${var.logging_service}" monitoring_service = "${var.monitoring_service}" @@ -97,7 +97,7 @@ resource "google_container_node_pool" "zonal_pools" { project = "${var.project_id}" zone = "${var.zones[0]}" cluster = "${var.name}" - version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version)}" + version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version_zonal)}" initial_node_count = "${lookup(var.node_pools[count.index], "initial_node_count", lookup(var.node_pools[count.index], "min_count", 1))}" autoscaling { @@ -107,7 +107,7 @@ resource "google_container_node_pool" "zonal_pools" { management { auto_repair = "${lookup(var.node_pools[count.index], "auto_repair", true)}" - auto_upgrade = "${lookup(var.node_pools[count.index], "auto_upgrade", true)}" + auto_upgrade = "${lookup(var.node_pools[count.index], "auto_upgrade", false)}" } node_config { diff --git a/examples/deploy_service/main.tf b/examples/deploy_service/main.tf index 82b824ff45..aea7dea29b 100644 --- a/examples/deploy_service/main.tf +++ b/examples/deploy_service/main.tf @@ -24,6 +24,11 @@ provider "google" { version = "~> 1.20" } +provider "google-beta" { + credentials = "${file(var.credentials_path)}" + region = "${var.region}" +} + provider "kubernetes" { load_config_file = false host = "https://${module.gke.endpoint}" diff --git a/examples/node_pool/main.tf b/examples/node_pool/main.tf index 14fdd9e3a3..cc901d5403 100644 --- a/examples/node_pool/main.tf +++ b/examples/node_pool/main.tf @@ -24,6 +24,11 @@ provider "google" { region = "${var.region}" } +provider "google-beta" { + credentials = "${file(var.credentials_path)}" + region = "${var.region}" +} + module "gke" { source = "../../" project_id = "${var.project_id}" @@ -43,6 +48,7 @@ module "gke" { min_count = 1 max_count = 2 service_account = "${var.compute_engine_service_account}" + auto_upgrade = true }, { name = "pool-02" @@ -53,7 +59,6 @@ module "gke" { disk_type = "pd-standard" image_type = "COS" auto_repair = false - auto_upgrade = false service_account = "${var.compute_engine_service_account}" }, ] diff --git a/examples/shared_vpc/main.tf b/examples/shared_vpc/main.tf index 75877b31a6..5d177d2c1c 100644 --- a/examples/shared_vpc/main.tf +++ b/examples/shared_vpc/main.tf @@ -24,6 +24,11 @@ provider "google" { region = "${var.region}" } +provider "google-beta" { + credentials = "${file(var.credentials_path)}" + region = "${var.region}" +} + module "gke" { source = "../../" project_id = "${var.project_id}" diff --git a/examples/simple_regional/main.tf b/examples/simple_regional/main.tf index 5694ace8e4..864e70b32e 100644 --- a/examples/simple_regional/main.tf +++ b/examples/simple_regional/main.tf @@ -23,6 +23,11 @@ provider "google" { region = "${var.region}" } +provider "google-beta" { + credentials = "${file(var.credentials_path)}" + region = "${var.region}" +} + module "gke" { source = "../../" project_id = "${var.project_id}" diff --git a/examples/simple_zonal/main.tf b/examples/simple_zonal/main.tf index a0dc9ffa97..d08c0da413 100644 --- a/examples/simple_zonal/main.tf +++ b/examples/simple_zonal/main.tf @@ -24,6 +24,11 @@ provider "google" { region = "${var.region}" } +provider "google-beta" { + credentials = "${file(var.credentials_path)}" + region = "${var.region}" +} + module "gke" { source = "../../" project_id = "${var.project_id}" diff --git a/examples/stub_domains/main.tf b/examples/stub_domains/main.tf index 71dbb19e7e..e95e2f10d2 100644 --- a/examples/stub_domains/main.tf +++ b/examples/stub_domains/main.tf @@ -24,6 +24,11 @@ provider "google" { region = "${var.region}" } +provider "google-beta" { + credentials = "${file(var.credentials_path)}" + region = "${var.region}" +} + module "gke" { source = "../../" project_id = "${var.project_id}" diff --git a/main.tf b/main.tf index 7ba3caf19b..ca2ae78efd 100644 --- a/main.tf +++ b/main.tf @@ -31,10 +31,12 @@ resource "random_shuffle" "available_zones" { } locals { - kubernetes_version = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.region.latest_node_version}" - node_version = "${var.node_version != "" ? var.node_version : local.kubernetes_version}" - custom_kube_dns_config = "${length(keys(var.stub_domains)) > 0 ? true : false}" - network_project_id = "${var.network_project_id != "" ? var.network_project_id : var.project_id}" + kubernetes_version_regional = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.region.latest_master_version}" + kubernetes_version_zonal = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.zone.latest_master_version}" + node_version_regional = "${var.node_version != "" && var.regional ? var.node_version : local.kubernetes_version_regional}" + node_version_zonal = "${var.node_version != "" && !var.regional ? var.node_version : local.kubernetes_version_zonal}" + custom_kube_dns_config = "${length(keys(var.stub_domains)) > 0 ? true : false}" + network_project_id = "${var.network_project_id != "" ? var.network_project_id : var.project_id}" cluster_type = "${var.regional ? "regional" : "zonal"}" @@ -149,7 +151,17 @@ locals { Get available container engine versions *****************************************/ data "google_container_engine_versions" "region" { - provider = "google" - zone = "${data.google_compute_zones.available.names[0]}" + provider = "google-beta" + region = "${var.region}" project = "${var.project_id}" -} \ No newline at end of file +} + +data "google_container_engine_versions" "zone" { + // Work around to prevent a lack of zone declaration from causing regional cluster creation from erroring out due to error + // + // data.google_container_engine_versions.zone: Cannot determine zone: set in this resource, or set provider-level zone. + // + zone = "${var.zones[0] == "" ? data.google_compute_zones.available.names[0] : var.zones[0]}" + + project = "${var.project_id}" +} diff --git a/modules/private-cluster/cluster_regional.tf b/modules/private-cluster/cluster_regional.tf index 4904c5ed83..7603aba582 100644 --- a/modules/private-cluster/cluster_regional.tf +++ b/modules/private-cluster/cluster_regional.tf @@ -31,7 +31,7 @@ resource "google_container_cluster" "primary" { network = "${replace(data.google_compute_network.gke_network.self_link, "https://www.googleapis.com/compute/v1/", "")}" subnetwork = "${replace(data.google_compute_subnetwork.gke_subnetwork.self_link, "https://www.googleapis.com/compute/v1/", "")}" - min_master_version = "${local.kubernetes_version}" + min_master_version = "${local.kubernetes_version_regional}" logging_service = "${var.logging_service}" monitoring_service = "${var.monitoring_service}" @@ -102,7 +102,7 @@ resource "google_container_node_pool" "pools" { project = "${var.project_id}" region = "${var.region}" cluster = "${var.name}" - version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version)}" + version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version_regional)}" initial_node_count = "${lookup(var.node_pools[count.index], "initial_node_count", lookup(var.node_pools[count.index], "min_count", 1))}" autoscaling { diff --git a/modules/private-cluster/cluster_zonal.tf b/modules/private-cluster/cluster_zonal.tf index 05ad8035a1..b0a25c394b 100644 --- a/modules/private-cluster/cluster_zonal.tf +++ b/modules/private-cluster/cluster_zonal.tf @@ -31,7 +31,7 @@ resource "google_container_cluster" "zonal_primary" { network = "${replace(data.google_compute_network.gke_network.self_link, "https://www.googleapis.com/compute/v1/", "")}" subnetwork = "${replace(data.google_compute_subnetwork.gke_subnetwork.self_link, "https://www.googleapis.com/compute/v1/", "")}" - min_master_version = "${local.kubernetes_version}" + min_master_version = "${local.kubernetes_version_zonal}" logging_service = "${var.logging_service}" monitoring_service = "${var.monitoring_service}" @@ -102,7 +102,7 @@ resource "google_container_node_pool" "zonal_pools" { project = "${var.project_id}" zone = "${var.zones[0]}" cluster = "${var.name}" - version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version)}" + version = "${lookup(var.node_pools[count.index], "auto_upgrade", false) ? "" : lookup(var.node_pools[count.index], "version", local.node_version_zonal)}" initial_node_count = "${lookup(var.node_pools[count.index], "initial_node_count", lookup(var.node_pools[count.index], "min_count", 1))}" autoscaling { diff --git a/modules/private-cluster/main.tf b/modules/private-cluster/main.tf index 57d98dd2b0..3e93de103c 100644 --- a/modules/private-cluster/main.tf +++ b/modules/private-cluster/main.tf @@ -31,10 +31,12 @@ resource "random_shuffle" "available_zones" { } locals { - kubernetes_version = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.region.latest_node_version}" - node_version = "${var.node_version != "" ? var.node_version : local.kubernetes_version}" - custom_kube_dns_config = "${length(keys(var.stub_domains)) > 0 ? true : false}" - network_project_id = "${var.network_project_id != "" ? var.network_project_id : var.project_id}" + kubernetes_version_regional = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.region.latest_master_version}" + kubernetes_version_zonal = "${var.kubernetes_version != "latest" ? var.kubernetes_version : data.google_container_engine_versions.zone.latest_master_version}" + node_version_regional = "${var.node_version != "" && var.regional ? var.node_version : local.kubernetes_version_regional}" + node_version_zonal = "${var.node_version != "" && !var.regional ? var.node_version : local.kubernetes_version_zonal}" + custom_kube_dns_config = "${length(keys(var.stub_domains)) > 0 ? true : false}" + network_project_id = "${var.network_project_id != "" ? var.network_project_id : var.project_id}" cluster_type = "${var.regional ? "regional" : "zonal"}" @@ -150,6 +152,16 @@ locals { *****************************************/ data "google_container_engine_versions" "region" { provider = "google-beta" - zone = "${data.google_compute_zones.available.names[0]}" + region = "${var.region}" project = "${var.project_id}" } + +data "google_container_engine_versions" "zone" { + // Work around to prevent a lack of zone declaration from causing regional cluster creation from erroring out due to error + // + // data.google_container_engine_versions.zone: Cannot determine zone: set in this resource, or set provider-level zone. + // + zone = "${var.zones[0] == "" ? data.google_compute_zones.available.names[0] : var.zones[0]}" + + project = "${var.project_id}" +} diff --git a/modules/private-cluster/variables.tf b/modules/private-cluster/variables.tf index 6225ae94c9..d46391599c 100644 --- a/modules/private-cluster/variables.tf +++ b/modules/private-cluster/variables.tf @@ -116,7 +116,7 @@ variable "ip_range_pods" { } variable "ip_range_services" { - description = "The _name_ of the secondary subnet ip range to use for services" + description = "The _name_ of the secondary subnet range to use for services" } variable "remove_default_node_pool" { diff --git a/variables.tf b/variables.tf index f37c36be1b..d5b571491a 100644 --- a/variables.tf +++ b/variables.tf @@ -116,7 +116,7 @@ variable "ip_range_pods" { } variable "ip_range_services" { - description = "The _name_ of the secondary subnet ip range to use for services" + description = "The _name_ of the secondary subnet range to use for services" } variable "remove_default_node_pool" {