diff --git a/blueprints/gke/autopilot/cluster.tf b/blueprints/gke/autopilot/cluster.tf index a823a894ca..db4f29cc32 100644 --- a/blueprints/gke/autopilot/cluster.tf +++ b/blueprints/gke/autopilot/cluster.tf @@ -30,8 +30,9 @@ module "cluster" { # autopilot = true # } # monitoring_config = { - # enenable_components = ["SYSTEM_COMPONENTS"] - # managed_prometheus = true + # enable_api_server_metrics = true + # enable_controller_manager_metrics = true + # enable_scheduler_metrics = true # } # cluster_autoscaling = { # auto_provisioning_defaults = { diff --git a/blueprints/gke/multitenant-fleet/README.md b/blueprints/gke/multitenant-fleet/README.md index baaf288f0e..8af50bc33e 100644 --- a/blueprints/gke/multitenant-fleet/README.md +++ b/blueprints/gke/multitenant-fleet/README.md @@ -244,21 +244,21 @@ module "gke" { | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [billing_account_id](variables.tf#L17) | Billing account id. | string | ✓ | | -| [folder_id](variables.tf#L138) | Folder used for the GKE project in folders/nnnnnnnnnnn format. | string | ✓ | | -| [prefix](variables.tf#L189) | Prefix used for resource names. | string | ✓ | | -| [project_id](variables.tf#L198) | ID of the project that will contain all the clusters. | string | ✓ | | -| [vpc_config](variables.tf#L210) | Shared VPC project and VPC details. | object({…}) | ✓ | | -| [clusters](variables.tf#L22) | Clusters configuration. Refer to the gke-cluster module for type details. | map(object({…})) | | {} | -| [fleet_configmanagement_clusters](variables.tf#L76) | Config management features enabled on specific sets of member clusters, in config name => [cluster name] format. | map(list(string)) | | {} | -| [fleet_configmanagement_templates](variables.tf#L83) | Sets of config management configurations that can be applied to member clusters, in config name => {options} format. | map(object({…})) | | {} | -| [fleet_features](variables.tf#L118) | Enable and configure fleet features. Set to null to disable GKE Hub if fleet workload identity is not used. | object({…}) | | null | -| [fleet_workload_identity](variables.tf#L131) | Use Fleet Workload Identity for clusters. Enables GKE Hub if set to true. | bool | | false | -| [group_iam](variables.tf#L143) | Project-level IAM bindings for groups. Use group emails as keys, list of roles as values. | map(list(string)) | | {} | -| [iam](variables.tf#L150) | Project-level authoritative IAM bindings for users and service accounts in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | -| [labels](variables.tf#L157) | Project-level labels. | map(string) | | {} | -| [nodepools](variables.tf#L163) | Nodepools configuration. Refer to the gke-nodepool module for type details. | map(map(object({…}))) | | {} | -| [project_services](variables.tf#L203) | Additional project services to enable. | list(string) | | [] | +| [billing_account_id](variables.tf#L17) | Billing account ID. | string | ✓ | | +| [folder_id](variables.tf#L148) | Folder used for the GKE project in folders/nnnnnnnnnnn format. | string | ✓ | | +| [prefix](variables.tf#L199) | Prefix used for resource names. | string | ✓ | | +| [project_id](variables.tf#L208) | ID of the project that will contain all the clusters. | string | ✓ | | +| [vpc_config](variables.tf#L220) | Shared VPC project and VPC details. | object({…}) | ✓ | | +| [clusters](variables.tf#L22) | Clusters configuration. Refer to the gke-cluster module for type details. | map(object({…})) | | {} | +| [fleet_configmanagement_clusters](variables.tf#L86) | Config management features enabled on specific sets of member clusters, in config name => [cluster name] format. | map(list(string)) | | {} | +| [fleet_configmanagement_templates](variables.tf#L93) | Sets of config management configurations that can be applied to member clusters, in config name => {options} format. | map(object({…})) | | {} | +| [fleet_features](variables.tf#L128) | Enable and configure fleet features. Set to null to disable GKE Hub if fleet workload identity is not used. | object({…}) | | null | +| [fleet_workload_identity](variables.tf#L141) | Use Fleet Workload Identity for clusters. Enables GKE Hub if set to true. | bool | | false | +| [group_iam](variables.tf#L153) | Project-level IAM bindings for groups. Use group emails as keys, list of roles as values. | map(list(string)) | | {} | +| [iam](variables.tf#L160) | Project-level authoritative IAM bindings for users and service accounts in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | +| [labels](variables.tf#L167) | Project-level labels. | map(string) | | {} | +| [nodepools](variables.tf#L173) | Nodepools configuration. Refer to the gke-nodepool module for type details. | map(map(object({…}))) | | {} | +| [project_services](variables.tf#L213) | Additional project services to enable. | list(string) | | [] | ## Outputs diff --git a/blueprints/gke/multitenant-fleet/variables.tf b/blueprints/gke/multitenant-fleet/variables.tf index 2461ea8a41..6f10080254 100644 --- a/blueprints/gke/multitenant-fleet/variables.tf +++ b/blueprints/gke/multitenant-fleet/variables.tf @@ -15,7 +15,7 @@ */ variable "billing_account_id" { - description = "Billing account id." + description = "Billing account ID." type = string } @@ -48,9 +48,19 @@ variable "clusters" { max_pods_per_node = optional(number, 110) min_master_version = optional(string) monitoring_config = optional(object({ - enable_components = optional(list(string), ["SYSTEM_COMPONENTS"]) - managed_prometheus = optional(bool) - })) + enable_system_metrics = optional(bool, true) + + # Control plane metrics + enable_api_server_metrics = optional(bool, false) + enable_controller_manager_metrics = optional(bool, false) + enable_scheduler_metrics = optional(bool, false) + + # TODO add kube state metrics + + # Google Cloud Managed Service for Prometheus + enable_managed_prometheus = optional(bool, true) + }), {}) + node_locations = optional(list(string)) private_cluster_config = optional(any) release_channel = optional(string) diff --git a/fast/stages/3-gke-multitenant/dev/README.md b/fast/stages/3-gke-multitenant/dev/README.md index 411c4bc410..105add9997 100644 --- a/fast/stages/3-gke-multitenant/dev/README.md +++ b/fast/stages/3-gke-multitenant/dev/README.md @@ -163,21 +163,21 @@ Leave all these variables unset (or set to `null`) to disable fleet management. |---|---|:---:|:---:|:---:|:---:| | [automation](variables.tf#L21) | Automation resources created by the bootstrap stage. | object({…}) | ✓ | | 0-bootstrap | | [billing_account](variables.tf#L29) | Billing account id. If billing account is not part of the same org set `is_org_level` to false. | object({…}) | ✓ | | 0-bootstrap | -| [folder_ids](variables.tf#L159) | Folders to be used for the networking resources in folders/nnnnnnnnnnn format. If null, folder will be created. | object({…}) | ✓ | | 1-resman | -| [host_project_ids](variables.tf#L174) | Host project for the shared VPC. | object({…}) | ✓ | | 2-networking | -| [prefix](variables.tf#L227) | Prefix used for resources that need unique names. | string | ✓ | | | -| [vpc_self_links](variables.tf#L243) | Self link for the shared VPC. | object({…}) | ✓ | | 2-networking | -| [clusters](variables.tf#L42) | Clusters configuration. Refer to the gke-cluster module for type details. | map(object({…})) | | {} | | -| [fleet_configmanagement_clusters](variables.tf#L96) | Config management features enabled on specific sets of member clusters, in config name => [cluster name] format. | map(list(string)) | | {} | | -| [fleet_configmanagement_templates](variables.tf#L104) | Sets of config management configurations that can be applied to member clusters, in config name => {options} format. | map(object({…})) | | {} | | -| [fleet_features](variables.tf#L139) | Enable and configure fleet features. Set to null to disable GKE Hub if fleet workload identity is not used. | object({…}) | | null | | -| [fleet_workload_identity](variables.tf#L152) | Use Fleet Workload Identity for clusters. Enables GKE Hub if set to true. | bool | | false | | -| [group_iam](variables.tf#L167) | Project-level authoritative IAM bindings for groups in {GROUP_EMAIL => [ROLES]} format. Use group emails as keys, list of roles as values. | map(list(string)) | | {} | | -| [iam](variables.tf#L182) | Project-level authoritative IAM bindings for users and service accounts in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | | -| [labels](variables.tf#L189) | Project-level labels. | map(string) | | {} | | -| [nodepools](variables.tf#L195) | Nodepools configuration. Refer to the gke-nodepool module for type details. | map(map(object({…}))) | | {} | | -| [outputs_location](variables.tf#L221) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | -| [project_services](variables.tf#L236) | Additional project services to enable. | list(string) | | [] | | +| [folder_ids](variables.tf#L168) | Folders to be used for the networking resources in folders/nnnnnnnnnnn format. If null, folder will be created. | object({…}) | ✓ | | 1-resman | +| [host_project_ids](variables.tf#L183) | Host project for the shared VPC. | object({…}) | ✓ | | 2-networking | +| [prefix](variables.tf#L236) | Prefix used for resources that need unique names. | string | ✓ | | | +| [vpc_self_links](variables.tf#L252) | Self link for the shared VPC. | object({…}) | ✓ | | 2-networking | +| [clusters](variables.tf#L42) | Clusters configuration. Refer to the gke-cluster-standard module for type details. | map(object({…})) | | {} | | +| [fleet_configmanagement_clusters](variables.tf#L105) | Config management features enabled on specific sets of member clusters, in config name => [cluster name] format. | map(list(string)) | | {} | | +| [fleet_configmanagement_templates](variables.tf#L113) | Sets of config management configurations that can be applied to member clusters, in config name => {options} format. | map(object({…})) | | {} | | +| [fleet_features](variables.tf#L148) | Enable and configure fleet features. Set to null to disable GKE Hub if fleet workload identity is not used. | object({…}) | | null | | +| [fleet_workload_identity](variables.tf#L161) | Use Fleet Workload Identity for clusters. Enables GKE Hub if set to true. | bool | | false | | +| [group_iam](variables.tf#L176) | Project-level authoritative IAM bindings for groups in {GROUP_EMAIL => [ROLES]} format. Use group emails as keys, list of roles as values. | map(list(string)) | | {} | | +| [iam](variables.tf#L191) | Project-level authoritative IAM bindings for users and service accounts in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | | +| [labels](variables.tf#L198) | Project-level labels. | map(string) | | {} | | +| [nodepools](variables.tf#L204) | Nodepools configuration. Refer to the gke-nodepool module for type details. | map(map(object({…}))) | | {} | | +| [outputs_location](variables.tf#L230) | Path where providers, tfvars files, and lists for the following stages are written. Leave empty to disable. | string | | null | | +| [project_services](variables.tf#L245) | Additional project services to enable. | list(string) | | [] | | ## Outputs diff --git a/fast/stages/3-gke-multitenant/dev/variables.tf b/fast/stages/3-gke-multitenant/dev/variables.tf index dffca0bf22..4d1779386f 100644 --- a/fast/stages/3-gke-multitenant/dev/variables.tf +++ b/fast/stages/3-gke-multitenant/dev/variables.tf @@ -40,7 +40,7 @@ variable "billing_account" { } variable "clusters" { - description = "Clusters configuration. Refer to the gke-cluster module for type details." + description = "Clusters configuration. Refer to the gke-cluster-standard module for type details." type = map(object({ cluster_autoscaling = optional(any) description = optional(string) @@ -68,9 +68,18 @@ variable "clusters" { max_pods_per_node = optional(number, 110) min_master_version = optional(string) monitoring_config = optional(object({ - enable_components = optional(list(string), ["SYSTEM_COMPONENTS"]) - managed_prometheus = optional(bool) - })) + enable_system_metrics = optional(bool, true) + + # Control plane metrics + enable_api_server_metrics = optional(bool, false) + enable_controller_manager_metrics = optional(bool, false) + enable_scheduler_metrics = optional(bool, false) + + # TODO add kube state metrics + + # Google Cloud Managed Service for Prometheus + enable_managed_prometheus = optional(bool, true) + }), {}) node_locations = optional(list(string)) private_cluster_config = optional(any) release_channel = optional(string) diff --git a/modules/gke-cluster-autopilot/README.md b/modules/gke-cluster-autopilot/README.md index 2c0083108f..e5677a5895 100644 --- a/modules/gke-cluster-autopilot/README.md +++ b/modules/gke-cluster-autopilot/README.md @@ -50,11 +50,11 @@ module "cluster-1" { ### Cloud DNS -This example shows how to [use Cloud DNS as a Kubernetes DNS provider](https://cloud.google.com/kubernetes-engine/docs/how-to/cloud-dns). - -> **Warning** +> [!WARNING] > [Cloud DNS is the only DNS provider for Autopilot clusters](https://cloud.google.com/kubernetes-engine/docs/concepts/service-discovery#cloud_dns) running version `1.25.9-gke.400` and later, and version `1.26.4-gke.500` and later. It is [pre-configured](https://cloud.google.com/kubernetes-engine/docs/resources/autopilot-standard-feature-comparison#feature-comparison) for those clusters. The following example *only* applies to Autopilot clusters running *earlier* versions. +This example shows how to [use Cloud DNS as a Kubernetes DNS provider](https://cloud.google.com/kubernetes-engine/docs/how-to/cloud-dns). + ```hcl module "cluster-1" { source = "./fabric/modules/gke-cluster-autopilot" @@ -79,11 +79,11 @@ module "cluster-1" { ### Logging configuration -This example shows how to [collect logs for the Kubernetes control plane components](https://cloud.google.com/stackdriver/docs/solutions/gke/installing). The logs for these components are not collected by default. - -> **Note** +> [!NOTE] > System and workload logs collection is pre-configured for Autopilot clusters and cannot be disabled. +This example shows how to [collect logs for the Kubernetes control plane components](https://cloud.google.com/stackdriver/docs/solutions/gke/installing). The logs for these components are not collected by default. + ```hcl module "cluster-1" { source = "./fabric/modules/gke-cluster-autopilot" @@ -106,14 +106,14 @@ module "cluster-1" { ### Monitoring configuration -This example shows how to [configure collection of Kubernetes control plane metrics](https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#enable-control-plane-metrics). The metrics for these components are not collected by default. - -> **Note** +> [!NOTE] > System metrics collection is pre-configured for Autopilot clusters and cannot be disabled. -> **Warning** +> [!WARNING] > GKE **workload metrics** is deprecated and removed in GKE 1.24 and later. Workload metrics is replaced by [Google Cloud Managed Service for Prometheus](https://cloud.google.com/stackdriver/docs/managed-prometheus), which is Google's recommended way to monitor Kubernetes applications by using Cloud Monitoring. +This example shows how to [configure collection of Kubernetes control plane metrics](https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#enable-control-plane-metrics). The metrics for these components are not collected by default. + ```hcl module "cluster-1" { source = "./fabric/modules/gke-cluster-autopilot" @@ -136,14 +136,14 @@ module "cluster-1" { ### Backup for GKE +> [!NOTE] +> Although Backup for GKE can be enabled as an add-on when configuring your GKE clusters, it is a separate service from GKE. + [Backup for GKE](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke) is a service for backing up and restoring workloads in GKE clusters. It has two components: * A [Google Cloud API](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/reference/rest) that serves as the control plane for the service. * A GKE add-on (the [Backup for GKE agent](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke#agent_overview)) that must be enabled in each cluster for which you wish to perform backup and restore operations. -> **Note** -> Although Backup for GKE can be enabled as an add-on when configuring your GKE clusters, it is a separate service from GKE. - Backup for GKE is supported in GKE Autopilot clusters with [some restrictions](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/about-autopilot). This example shows how to [enable Backup for GKE on a new Autopilot cluster](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/how-to/install#enable_on_a_new_cluster_optional) and [plan a set of backups](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/how-to/backup-plan). @@ -176,9 +176,9 @@ module "cluster-1" { | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [location](variables.tf#L110) | Autopilot cluster are always regional. | string | ✓ | | +| [location](variables.tf#L110) | Autopilot clusters are always regional. | string | ✓ | | | [name](variables.tf#L170) | Cluster name. | string | ✓ | | -| [project_id](variables.tf#L196) | Cluster project id. | string | ✓ | | +| [project_id](variables.tf#L196) | Cluster project ID. | string | ✓ | | | [vpc_config](variables.tf#L225) | VPC-level configuration. | object({…}) | ✓ | | | [backup_configs](variables.tf#L17) | Configuration for Backup for GKE. | object({…}) | | {} | | [description](variables.tf#L37) | Cluster description. | string | | null | @@ -203,7 +203,7 @@ module "cluster-1" { | [ca_certificate](outputs.tf#L17) | Public certificate of the cluster (base64-encoded). | ✓ | | [cluster](outputs.tf#L23) | Cluster resource. | ✓ | | [endpoint](outputs.tf#L29) | Cluster endpoint. | | -| [id](outputs.tf#L34) | Fully qualified cluster id. | | +| [id](outputs.tf#L34) | Fully qualified cluster ID. | | | [location](outputs.tf#L39) | Cluster location. | | | [master_version](outputs.tf#L44) | Master version. | | | [name](outputs.tf#L49) | Cluster name. | | diff --git a/modules/gke-cluster-autopilot/main.tf b/modules/gke-cluster-autopilot/main.tf index 9af26acdf8..7948fd58a1 100644 --- a/modules/gke-cluster-autopilot/main.tf +++ b/modules/gke-cluster-autopilot/main.tf @@ -103,6 +103,13 @@ resource "google_container_cluster" "cluster" { } } + dynamic "gateway_api_config" { + for_each = var.enable_features.gateway_api ? [""] : [] + content { + channel = "CHANNEL_STANDARD" + } + } + dynamic "ip_allocation_policy" { for_each = var.vpc_config.secondary_range_blocks != null ? [""] : [] content { @@ -131,13 +138,6 @@ resource "google_container_cluster" "cluster" { ])) } - dynamic "gateway_api_config" { - for_each = var.enable_features.gateway_api ? [""] : [] - content { - channel = "CHANNEL_STANDARD" - } - } - maintenance_policy { dynamic "daily_maintenance_window" { for_each = ( @@ -207,7 +207,7 @@ resource "google_container_cluster" "cluster" { enable_components = toset(compact([ # System metrics collection cannot be disabled for Autopilot clusters. "SYSTEM_COMPONENTS", - # Control plane metrics. + # Control plane metrics: var.monitoring_config.enable_api_server_metrics ? "APISERVER" : null, var.monitoring_config.enable_controller_manager_metrics ? "CONTROLLER_MANAGER" : null, var.monitoring_config.enable_scheduler_metrics ? "SCHEDULER" : null, diff --git a/modules/gke-cluster-autopilot/outputs.tf b/modules/gke-cluster-autopilot/outputs.tf index 029ab06a22..7978e55b03 100644 --- a/modules/gke-cluster-autopilot/outputs.tf +++ b/modules/gke-cluster-autopilot/outputs.tf @@ -32,7 +32,7 @@ output "endpoint" { } output "id" { - description = "Fully qualified cluster id." + description = "Fully qualified cluster ID." value = google_container_cluster.cluster.id } diff --git a/modules/gke-cluster-autopilot/variables.tf b/modules/gke-cluster-autopilot/variables.tf index 37c054e344..bf4102223e 100644 --- a/modules/gke-cluster-autopilot/variables.tf +++ b/modules/gke-cluster-autopilot/variables.tf @@ -108,7 +108,7 @@ variable "labels" { } variable "location" { - description = "Autopilot cluster are always regional." + description = "Autopilot clusters are always regional." type = string } @@ -194,7 +194,7 @@ variable "private_cluster_config" { } variable "project_id" { - description = "Cluster project id." + description = "Cluster project ID." type = string } diff --git a/modules/gke-cluster-standard/README.md b/modules/gke-cluster-standard/README.md index 0dfd6636f3..da5a8897a1 100644 --- a/modules/gke-cluster-standard/README.md +++ b/modules/gke-cluster-standard/README.md @@ -1,10 +1,29 @@ -# GKE cluster Standard module +# GKE Standard cluster module -This module allows simplified creation and management of GKE Standard clusters and should be used together with the GKE nodepool module, as the default nodepool is turned off here and cannot be re-enabled. Some sensible defaults are set initially, in order to allow less verbose usage for most use cases. +This module offers a way to create and manage Google Kubernetes Engine (GKE) [Standard clusters](https://cloud.google.com/kubernetes-engine/docs/concepts/choose-cluster-mode#why-standard). With its sensible default settings based on best practices and authors' experience as Google Cloud practitioners, the module accommodates for many common use cases out-of-the-box, without having to rely on verbose configuration. + +> [!IMPORTANT] +> This module should be used together with the [`gke-nodepool`](../gke-nodepool/) module because the default node pool is deleted upon cluster creation and cannot be re-created. + + +- [Example](#example) + - [GKE Standard cluster](#gke-standard-cluster) + - [Enable Dataplane V2](#enable-dataplane-v2) + - [Managing GKE logs](#managing-gke-logs) + - [Monitoring configuration](#monitoring-configuration) + - [Disable GKE logs or metrics collection](#disable-gke-logs-or-metrics-collection) + - [Cloud DNS](#cloud-dns) + - [Backup for GKE](#backup-for-gke) + - [Automatic creation of new secondary ranges](#automatic-creation-of-new-secondary-ranges) +- [Variables](#variables) +- [Outputs](#outputs) + ## Example -### GKE Cluster +### GKE Standard cluster + +This example shows how to [create a zonal GKE cluster in Standard mode](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-zonal-cluster). ```hcl module "cluster-1" { @@ -36,7 +55,9 @@ module "cluster-1" { # tftest modules=1 resources=1 inventory=basic.yaml ``` -### GKE Cluster with Dataplane V2 enabled +### Enable Dataplane V2 + +This example shows how to [create a zonal GKE Cluster with Dataplane V2 enabled](https://cloud.google.com/kubernetes-engine/docs/how-to/dataplane-v2). ```hcl module "cluster-1" { @@ -95,15 +116,40 @@ module "cluster-1" { # tftest modules=1 resources=1 inventory=logging-config-enable-all.yaml ``` -### Disable GKE logs collection +### Monitoring configuration + +This example shows how to [configure collection of Kubernetes control plane metrics](https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#enable-control-plane-metrics). The metrics for these components are not collected by default. + +```hcl +module "cluster-1" { + source = "./fabric/modules/gke-cluster-standard" + project_id = "myproject" + name = "cluster-1" + location = "europe-west1-b" + vpc_config = { + network = var.vpc.self_link + subnetwork = var.subnet.self_link + secondary_range_names = {} + } + monitoring_config = { + enable_api_server_metrics = true + enable_controller_manager_metrics = true + enable_scheduler_metrics = true + } +} +# tftest modules=1 resources=1 inventory=monitoring-config-control-plane.yaml +``` + -This example shows how to fully disable logs collection on a GKE Standard cluster. This is not recommended. +### Disable GKE logs or metrics collection -> **Warning** +> [!WARNING] > If you've disabled Cloud Logging or Cloud Monitoring, GKE customer support > is offered on a best-effort basis and might require additional effort > from your engineering team. +This example shows how to fully disable logs collection on a zonal GKE Standard cluster. This is not recommended. + ```hcl module "cluster-1" { source = "./fabric/modules/gke-cluster-standard" @@ -122,6 +168,27 @@ module "cluster-1" { # tftest modules=1 resources=1 inventory=logging-config-disable-all.yaml ``` +This example shows how to fully disable metrics collection on a zonal GKE Standard cluster. This is not recommended. + +```hcl +module "cluster-1" { + source = "./fabric/modules/gke-cluster-standard" + project_id = "myproject" + name = "cluster-1" + location = "europe-west1-b" + vpc_config = { + network = var.vpc.self_link + subnetwork = var.subnet.self_link + secondary_range_names = {} + } + monitoring_config = { + enable_system_metrics = false + enable_managed_prometheus = false + } +} +# tftest modules=1 resources=1 inventory=monitoring-config-disable-all.yaml +``` + ### Cloud DNS This example shows how to [use Cloud DNS as a Kubernetes DNS provider](https://cloud.google.com/kubernetes-engine/docs/how-to/cloud-dns) for GKE Standard clusters. @@ -150,7 +217,15 @@ module "cluster-1" { ### Backup for GKE -This example shows how to [enable the Backup for GKE agent and configure a Backup Plan](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke) for GKE Standard clusters. +> [!NOTE] +> Although Backup for GKE can be enabled as an add-on when configuring your GKE clusters, it is a separate service from GKE. + +[Backup for GKE](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke) is a service for backing up and restoring workloads in GKE clusters. It has two components: + +* A [Google Cloud API](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/reference/rest) that serves as the control plane for the service. +* A GKE add-on (the [Backup for GKE agent](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke#agent_overview)) that must be enabled in each cluster for which you wish to perform backup and restore operations. + +This example shows how to [enable Backup for GKE on a new zonal GKE Standard cluster](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/how-to/install#enable_on_a_new_cluster_optional) and [plan a set of backups](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/how-to/backup-plan). ```hcl module "cluster-1" { @@ -197,16 +272,15 @@ module "cluster-1" { } # tftest modules=1 resources=1 ``` - ## Variables | name | description | type | required | default | |---|---|:---:|:---:|:---:| | [location](variables.tf#L138) | Cluster zone or region. | string | ✓ | | -| [name](variables.tf#L210) | Cluster name. | string | ✓ | | -| [project_id](variables.tf#L236) | Cluster project id. | string | ✓ | | -| [vpc_config](variables.tf#L253) | VPC-level configuration. | object({…}) | ✓ | | +| [name](variables.tf#L226) | Cluster name. | string | ✓ | | +| [project_id](variables.tf#L252) | Cluster project id. | string | ✓ | | +| [vpc_config](variables.tf#L269) | VPC-level configuration. | object({…}) | ✓ | | | [backup_configs](variables.tf#L17) | Configuration for Backup for GKE. | object({…}) | | {} | | [cluster_autoscaling](variables.tf#L37) | Enable and configure limits for Node Auto-Provisioning with Cluster Autoscaler. | object({…}) | | null | | [description](variables.tf#L58) | Cluster description. | string | | null | @@ -218,11 +292,11 @@ module "cluster-1" { | [maintenance_config](variables.tf#L164) | Maintenance window configuration. | object({…}) | | {…} | | [max_pods_per_node](variables.tf#L187) | Maximum number of pods per node in this cluster. | number | | 110 | | [min_master_version](variables.tf#L193) | Minimum version of the master, defaults to the version of the most recent official release. | string | | null | -| [monitoring_config](variables.tf#L199) | Monitoring components. | object({…}) | | {…} | -| [node_locations](variables.tf#L215) | Zones in which the cluster's nodes are located. | list(string) | | [] | -| [private_cluster_config](variables.tf#L222) | Private cluster configuration. | object({…}) | | null | -| [release_channel](variables.tf#L241) | Release channel for GKE upgrades. | string | | null | -| [tags](variables.tf#L247) | Network tags applied to nodes. | list(string) | | null | +| [monitoring_config](variables.tf#L199) | Monitoring configuration. Google Cloud Managed Service for Prometheus is enabled by default. | object({…}) | | {} | +| [node_locations](variables.tf#L231) | Zones in which the cluster's nodes are located. | list(string) | | [] | +| [private_cluster_config](variables.tf#L238) | Private cluster configuration. | object({…}) | | null | +| [release_channel](variables.tf#L257) | Release channel for GKE upgrades. | string | | null | +| [tags](variables.tf#L263) | Network tags applied to nodes. | list(string) | | null | ## Outputs diff --git a/modules/gke-cluster-standard/main.tf b/modules/gke-cluster-standard/main.tf index 57d5454f38..666de53fa6 100644 --- a/modules/gke-cluster-standard/main.tf +++ b/modules/gke-cluster-standard/main.tf @@ -40,8 +40,8 @@ resource "google_container_cluster" "cluster" { : "DATAPATH_PROVIDER_UNSPECIFIED" ) - # the default nodepool is deleted here, use the gke-nodepool module instead - # default nodepool configuration based on a shielded_nodes variable + # the default node pool is deleted here, use the gke-nodepool module instead. + # the default node pool configuration is based on a shielded_nodes variable. node_config { dynamic "shielded_instance_config" { for_each = var.enable_features.shielded_nodes ? [""] : [] @@ -164,6 +164,13 @@ resource "google_container_cluster" "cluster" { } } + dynamic "gateway_api_config" { + for_each = var.enable_features.gateway_api ? [""] : [] + content { + channel = "CHANNEL_STANDARD" + } + } + dynamic "ip_allocation_policy" { for_each = var.vpc_config.secondary_range_blocks != null ? [""] : [] content { @@ -205,13 +212,6 @@ resource "google_container_cluster" "cluster" { } } - dynamic "gateway_api_config" { - for_each = var.enable_features.gateway_api ? [""] : [] - content { - channel = "CHANNEL_STANDARD" - } - } - maintenance_policy { dynamic "daily_maintenance_window" { for_each = ( @@ -277,22 +277,21 @@ resource "google_container_cluster" "cluster" { } } - dynamic "monitoring_config" { - for_each = var.monitoring_config != null ? [""] : [] - content { - enable_components = var.monitoring_config.enable_components - dynamic "managed_prometheus" { - for_each = ( - try(var.monitoring_config.managed_prometheus, null) == true ? [""] : [] - ) - content { - enabled = true - } - } + monitoring_config { + enable_components = toset(compact([ + # System metrics is the minimum requirement if any other metrics are enabled. This is checked by input var validation. + var.monitoring_config.enable_system_metrics ? "SYSTEM_COMPONENTS" : null, + # Control plane metrics: + var.monitoring_config.enable_api_server_metrics ? "APISERVER" : null, + var.monitoring_config.enable_controller_manager_metrics ? "CONTROLLER_MANAGER" : null, + var.monitoring_config.enable_scheduler_metrics ? "SCHEDULER" : null, + ])) + managed_prometheus { + enabled = var.monitoring_config.enable_managed_prometheus } } - # dataplane v2 has built-in network policies + # Dataplane V2 has built-in network policies dynamic "network_policy" { for_each = ( var.enable_addons.network_policy && !var.enable_features.dataplane_v2 diff --git a/modules/gke-cluster-standard/variables.tf b/modules/gke-cluster-standard/variables.tf index cc1cb63f17..5387596142 100644 --- a/modules/gke-cluster-standard/variables.tf +++ b/modules/gke-cluster-standard/variables.tf @@ -197,13 +197,29 @@ variable "min_master_version" { } variable "monitoring_config" { - description = "Monitoring components." + description = "Monitoring configuration. Google Cloud Managed Service for Prometheus is enabled by default." type = object({ - enable_components = optional(list(string)) - managed_prometheus = optional(bool) + enable_system_metrics = optional(bool, true) + + # Control plane metrics + enable_api_server_metrics = optional(bool, false) + enable_controller_manager_metrics = optional(bool, false) + enable_scheduler_metrics = optional(bool, false) + + # TODO add kube state metrics and validation + + # Google Cloud Managed Service for Prometheus + enable_managed_prometheus = optional(bool, true) }) - default = { - enable_components = ["SYSTEM_COMPONENTS"] + default = {} + nullable = false + validation { + condition = anytrue([ + var.monitoring_config.enable_api_server_metrics, + var.monitoring_config.enable_controller_manager_metrics, + var.monitoring_config.enable_scheduler_metrics, + ]) ? var.monitoring_config.enable_system_metrics : true + error_message = "System metrics are the minimum required component for enabling metrics collection." } } diff --git a/tests/modules/gke_cluster_standard/examples/monitoring-config-control-plane.yaml b/tests/modules/gke_cluster_standard/examples/monitoring-config-control-plane.yaml new file mode 100644 index 0000000000..b31087701d --- /dev/null +++ b/tests/modules/gke_cluster_standard/examples/monitoring-config-control-plane.yaml @@ -0,0 +1,27 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +values: + module.cluster-1.google_container_cluster.cluster: + monitoring_config: + - enable_components: + - APISERVER + - CONTROLLER_MANAGER + - SCHEDULER + - SYSTEM_COMPONENTS + managed_prometheus: + - enabled: true + +counts: + google_container_cluster: 1 diff --git a/tests/modules/gke_cluster_standard/examples/monitoring-config-disable-all.yaml b/tests/modules/gke_cluster_standard/examples/monitoring-config-disable-all.yaml new file mode 100644 index 0000000000..1b5576a4d0 --- /dev/null +++ b/tests/modules/gke_cluster_standard/examples/monitoring-config-disable-all.yaml @@ -0,0 +1,23 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +values: + module.cluster-1.google_container_cluster.cluster: + monitoring_config: + - enable_components: [] + managed_prometheus: + - enabled: false + +counts: + google_container_cluster: 1