diff --git a/modules/gke-cluster-autopilot/README.md b/modules/gke-cluster-autopilot/README.md
index c4176a1df8..da6390661e 100644
--- a/modules/gke-cluster-autopilot/README.md
+++ b/modules/gke-cluster-autopilot/README.md
@@ -87,6 +87,35 @@ module "cluster-1" {
# tftest modules=1 resources=1 inventory=logging-config.yaml
```
+### Monitoring configuration
+
+This example shows how to [configure collection of Kubernetes control plane metrics](https://cloud.google.com/stackdriver/docs/solutions/gke/managing-metrics#enable-control-plane-metrics). The metrics for these components are not collected by default.
+
+> **Note**
+> System metrics collection is pre-configured for Autopilot clusters and cannot be disabled.
+
+> **Warning**
+> GKE **workload metrics** is deprecated and removed in GKE 1.24 and later. Workload metrics is replaced by [Google Cloud Managed Service for Prometheus](https://cloud.google.com/stackdriver/docs/managed-prometheus), which is Google's recommended way to monitor Kubernetes applications by using Cloud Monitoring.
+
+```hcl
+module "cluster-1" {
+ source = "./fabric/modules/gke-cluster-autopilot"
+ project_id = var.project_id
+ name = "cluster-1"
+ location = "europe-west1"
+ vpc_config = {
+ network = var.vpc.self_link
+ subnetwork = var.subnet.self_link
+ }
+ monitoring_config = {
+ enable_api_server_metrics = true
+ enable_controller_manager_metrics = true
+ enable_scheduler_metrics = true
+ }
+}
+# tftest modules=1 resources=1 inventory=monitoring-config-control-plane.yaml
+```
+
### Backup for GKE
This example shows how to [enable the Backup for GKE agent and configure a Backup Plan](https://cloud.google.com/kubernetes-engine/docs/add-on/backup-for-gke/concepts/backup-for-gke) for GKE Standard clusters.
@@ -120,9 +149,9 @@ module "cluster-1" {
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [location](variables.tf#L110) | Autopilot cluster are always regional. | string
| ✓ | |
-| [name](variables.tf#L155) | Cluster name. | string
| ✓ | |
-| [project_id](variables.tf#L181) | Cluster project id. | string
| ✓ | |
-| [vpc_config](variables.tf#L209) | VPC-level configuration. | object({…})
| ✓ | |
+| [name](variables.tf#L170) | Cluster name. | string
| ✓ | |
+| [project_id](variables.tf#L196) | Cluster project id. | string
| ✓ | |
+| [vpc_config](variables.tf#L224) | VPC-level configuration. | object({…})
| ✓ | |
| [backup_configs](variables.tf#L17) | Configuration for Backup for GKE. | object({…})
| | {}
|
| [description](variables.tf#L37) | Cluster description. | string
| | null
|
| [enable_addons](variables.tf#L43) | Addons enabled in the cluster (true means enabled). | object({…})
| | {…}
|
@@ -132,11 +161,12 @@ module "cluster-1" {
| [logging_config](variables.tf#L115) | Logging configuration. | object({…})
| | {}
|
| [maintenance_config](variables.tf#L126) | Maintenance window configuration. | object({…})
| | {…}
|
| [min_master_version](variables.tf#L149) | Minimum version of the master, defaults to the version of the most recent official release. | string
| | null
|
-| [node_locations](variables.tf#L160) | Zones in which the cluster's nodes are located. | list(string)
| | []
|
-| [private_cluster_config](variables.tf#L167) | Private cluster configuration. | object({…})
| | null
|
-| [release_channel](variables.tf#L186) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | string
| | "REGULAR"
|
-| [service_account](variables.tf#L197) | The Google Cloud Platform Service Account to be used by the node VMs created by GKE Autopilot. | string
| | null
|
-| [tags](variables.tf#L203) | Network tags applied to nodes. | list(string)
| | null
|
+| [monitoring_config](variables.tf#L155) | Monitoring configuration. System metrics collection cannot be disabled for Autopilot clusters. Control plane metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default. | object({…})
| | {}
|
+| [node_locations](variables.tf#L175) | Zones in which the cluster's nodes are located. | list(string)
| | []
|
+| [private_cluster_config](variables.tf#L182) | Private cluster configuration. | object({…})
| | null
|
+| [release_channel](variables.tf#L201) | Release channel for GKE upgrades. Clusters created in the Autopilot mode must use a release channel. Choose between \"RAPID\", \"REGULAR\", and \"STABLE\". | string
| | "REGULAR"
|
+| [service_account](variables.tf#L212) | The Google Cloud Platform Service Account to be used by the node VMs created by GKE Autopilot. | string
| | null
|
+| [tags](variables.tf#L218) | Network tags applied to nodes. | list(string)
| | null
|
## Outputs
diff --git a/modules/gke-cluster-autopilot/main.tf b/modules/gke-cluster-autopilot/main.tf
index 7ae4d04490..330c499326 100644
--- a/modules/gke-cluster-autopilot/main.tf
+++ b/modules/gke-cluster-autopilot/main.tf
@@ -203,6 +203,20 @@ resource "google_container_cluster" "cluster" {
}
}
+ monitoring_config {
+ enable_components = toset(compact([
+ # System metrics collection cannot be disabled for Autopilot clusters.
+ "SYSTEM_COMPONENTS",
+ # Control plane metrics.
+ var.monitoring_config.enable_api_server_metrics ? "APISERVER" : null,
+ var.monitoring_config.enable_controller_manager_metrics ? "CONTROLLER_MANAGER" : null,
+ var.monitoring_config.enable_scheduler_metrics ? "SCHEDULER" : null,
+ ]))
+ managed_prometheus {
+ enabled = var.monitoring_config.enable_managed_prometheus
+ }
+ }
+
dynamic "notification_config" {
for_each = var.enable_features.upgrade_notifications != null ? [""] : []
content {
@@ -305,7 +319,6 @@ resource "google_gke_backup_backup_plan" "backup_plan" {
}
}
-
resource "google_compute_network_peering_routes_config" "gke_master" {
count = (
try(var.private_cluster_config.peering_config, null) != null ? 1 : 0
diff --git a/modules/gke-cluster-autopilot/variables.tf b/modules/gke-cluster-autopilot/variables.tf
index 9a30b5bf3e..52896bbdc9 100644
--- a/modules/gke-cluster-autopilot/variables.tf
+++ b/modules/gke-cluster-autopilot/variables.tf
@@ -152,6 +152,21 @@ variable "min_master_version" {
default = null
}
+variable "monitoring_config" {
+ description = "Monitoring configuration. System metrics collection cannot be disabled for Autopilot clusters. Control plane metrics are optional. Google Cloud Managed Service for Prometheus is enabled by default."
+ type = object({
+ # Control plane metrics
+ enable_api_server_metrics = optional(bool, false)
+ enable_controller_manager_metrics = optional(bool, false)
+ enable_scheduler_metrics = optional(bool, false)
+ # Google Cloud Managed Service for Prometheus
+ # GKE Autopilot clusters running GKE version 1.25 or greater must have this on.
+ enable_managed_prometheus = optional(bool, true)
+ })
+ default = {}
+ nullable = false
+}
+
variable "name" {
description = "Cluster name."
type = string
diff --git a/tests/modules/gke_cluster_autopilot/examples/monitoring-config-control-plane.yaml b/tests/modules/gke_cluster_autopilot/examples/monitoring-config-control-plane.yaml
new file mode 100644
index 0000000000..b31087701d
--- /dev/null
+++ b/tests/modules/gke_cluster_autopilot/examples/monitoring-config-control-plane.yaml
@@ -0,0 +1,27 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+values:
+ module.cluster-1.google_container_cluster.cluster:
+ monitoring_config:
+ - enable_components:
+ - APISERVER
+ - CONTROLLER_MANAGER
+ - SCHEDULER
+ - SYSTEM_COMPONENTS
+ managed_prometheus:
+ - enabled: true
+
+counts:
+ google_container_cluster: 1