From 3265a94032d745ca65e03bd125e91ccc55d5ad6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= <aurelien.legrand01@gmail.com>
Date: Wed, 3 Jul 2024 12:27:03 +0200
Subject: [PATCH 1/7] Adding TPU limits for GKE cluster node auto-provisioning
 (NAP)

---
 modules/gke-cluster-standard/README.md    | 42 +++++++++++------------
 modules/gke-cluster-standard/main.tf      | 13 +++++++
 modules/gke-cluster-standard/variables.tf |  5 +++
 3 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/modules/gke-cluster-standard/README.md b/modules/gke-cluster-standard/README.md
index 40a6cdc6f0..27cb36a41c 100644
--- a/modules/gke-cluster-standard/README.md
+++ b/modules/gke-cluster-standard/README.md
@@ -310,28 +310,28 @@ module "cluster-1" {
 
 | name | description | type | required | default |
 |---|---|:---:|:---:|:---:|
-| [location](variables.tf#L237) | Cluster zone or region. | <code>string</code> | ✓ |  |
-| [name](variables.tf#L371) | Cluster name. | <code>string</code> | ✓ |  |
-| [project_id](variables.tf#L410) | Cluster project id. | <code>string</code> | ✓ |  |
-| [vpc_config](variables.tf#L421) | VPC-level configuration. | <code title="object&#40;&#123;&#10;  network                    &#61; string&#10;  subnetwork                 &#61; string&#10;  master_ipv4_cidr_block     &#61; optional&#40;string&#41;&#10;  master_endpoint_subnetwork &#61; optional&#40;string&#41;&#10;  secondary_range_blocks &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; string&#10;    services &#61; string&#10;  &#125;&#41;&#41;&#10;  secondary_range_names &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; optional&#40;string, &#34;pods&#34;&#41;&#10;    services &#61; optional&#40;string, &#34;services&#34;&#41;&#10;  &#125;&#41;&#41;&#10;  additional_ranges        &#61; optional&#40;list&#40;string&#41;&#41;&#10;  master_authorized_ranges &#61; optional&#40;map&#40;string&#41;&#41;&#10;  stack_type               &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ |  |
+| [location](variables.tf#L242) | Cluster zone or region. | <code>string</code> | ✓ |  |
+| [name](variables.tf#L376) | Cluster name. | <code>string</code> | ✓ |  |
+| [project_id](variables.tf#L415) | Cluster project id. | <code>string</code> | ✓ |  |
+| [vpc_config](variables.tf#L426) | VPC-level configuration. | <code title="object&#40;&#123;&#10;  network                    &#61; string&#10;  subnetwork                 &#61; string&#10;  master_ipv4_cidr_block     &#61; optional&#40;string&#41;&#10;  master_endpoint_subnetwork &#61; optional&#40;string&#41;&#10;  secondary_range_blocks &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; string&#10;    services &#61; string&#10;  &#125;&#41;&#41;&#10;  secondary_range_names &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; optional&#40;string, &#34;pods&#34;&#41;&#10;    services &#61; optional&#40;string, &#34;services&#34;&#41;&#10;  &#125;&#41;&#41;&#10;  additional_ranges        &#61; optional&#40;list&#40;string&#41;&#41;&#10;  master_authorized_ranges &#61; optional&#40;map&#40;string&#41;&#41;&#10;  stack_type               &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ |  |
 | [backup_configs](variables.tf#L17) | Configuration for Backup for GKE. | <code title="object&#40;&#123;&#10;  enable_backup_agent &#61; optional&#40;bool, false&#41;&#10;  backup_plans &#61; optional&#40;map&#40;object&#40;&#123;&#10;    region                            &#61; string&#10;    applications                      &#61; optional&#40;map&#40;list&#40;string&#41;&#41;&#41;&#10;    encryption_key                    &#61; optional&#40;string&#41;&#10;    include_secrets                   &#61; optional&#40;bool, true&#41;&#10;    include_volume_data               &#61; optional&#40;bool, true&#41;&#10;    labels                            &#61; optional&#40;map&#40;string&#41;&#41;&#10;    namespaces                        &#61; optional&#40;list&#40;string&#41;&#41;&#10;    schedule                          &#61; optional&#40;string&#41;&#10;    retention_policy_days             &#61; optional&#40;number&#41;&#10;    retention_policy_lock             &#61; optional&#40;bool, false&#41;&#10;    retention_policy_delete_lock_days &#61; optional&#40;number&#41;&#10;  &#125;&#41;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [cluster_autoscaling](variables.tf#L39) | Enable and configure limits for Node Auto-Provisioning with Cluster Autoscaler. | <code title="object&#40;&#123;&#10;  enabled             &#61; optional&#40;bool, true&#41;&#10;  autoscaling_profile &#61; optional&#40;string, &#34;BALANCED&#34;&#41;&#10;  auto_provisioning_defaults &#61; optional&#40;object&#40;&#123;&#10;    boot_disk_kms_key &#61; optional&#40;string&#41;&#10;    disk_size         &#61; optional&#40;number&#41;&#10;    disk_type         &#61; optional&#40;string, &#34;pd-standard&#34;&#41;&#10;    image_type        &#61; optional&#40;string&#41;&#10;    oauth_scopes      &#61; optional&#40;list&#40;string&#41;&#41;&#10;    service_account   &#61; optional&#40;string&#41;&#10;    management &#61; optional&#40;object&#40;&#123;&#10;      auto_repair  &#61; optional&#40;bool, true&#41;&#10;      auto_upgrade &#61; optional&#40;bool, true&#41;&#10;    &#125;&#41;&#41;&#10;    shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;      integrity_monitoring &#61; optional&#40;bool, true&#41;&#10;      secure_boot          &#61; optional&#40;bool, false&#41;&#10;    &#125;&#41;&#41;&#10;    upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;      blue_green &#61; optional&#40;object&#40;&#123;&#10;        node_pool_soak_duration &#61; optional&#40;string&#41;&#10;        standard_rollout_policy &#61; optional&#40;object&#40;&#123;&#10;          batch_percentage    &#61; optional&#40;number&#41;&#10;          batch_node_count    &#61; optional&#40;number&#41;&#10;          batch_soak_duration &#61; optional&#40;string&#41;&#10;        &#125;&#41;&#41;&#10;      &#125;&#41;&#41;&#10;      surge &#61; optional&#40;object&#40;&#123;&#10;        max         &#61; optional&#40;number&#41;&#10;        unavailable &#61; optional&#40;number&#41;&#10;      &#125;&#41;&#41;&#10;    &#125;&#41;&#41;&#10;  &#125;&#41;&#41;&#10;  cpu_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; number&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  mem_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; number&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  gpu_resources &#61; optional&#40;list&#40;object&#40;&#123;&#10;    resource_type &#61; string&#10;    min           &#61; number&#10;    max           &#61; number&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [default_nodepool](variables.tf#L118) | Enable default nodepool. | <code title="object&#40;&#123;&#10;  remove_pool        &#61; optional&#40;bool, true&#41;&#10;  initial_node_count &#61; optional&#40;number, 1&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [deletion_protection](variables.tf#L136) | Whether or not to allow Terraform to destroy the cluster. Unless this field is set to false in Terraform state, a terraform destroy or terraform apply that would delete the cluster will fail. | <code>bool</code> |  | <code>true</code> |
-| [description](variables.tf#L143) | Cluster description. | <code>string</code> |  | <code>null</code> |
-| [enable_addons](variables.tf#L149) | Addons enabled in the cluster (true means enabled). | <code title="object&#40;&#123;&#10;  cloudrun                       &#61; optional&#40;bool, false&#41;&#10;  config_connector               &#61; optional&#40;bool, false&#41;&#10;  dns_cache                      &#61; optional&#40;bool, false&#41;&#10;  gce_persistent_disk_csi_driver &#61; optional&#40;bool, false&#41;&#10;  gcp_filestore_csi_driver       &#61; optional&#40;bool, false&#41;&#10;  gcs_fuse_csi_driver            &#61; optional&#40;bool, false&#41;&#10;  horizontal_pod_autoscaling     &#61; optional&#40;bool, false&#41;&#10;  http_load_balancing            &#61; optional&#40;bool, false&#41;&#10;  istio &#61; optional&#40;object&#40;&#123;&#10;    enable_tls &#61; bool&#10;  &#125;&#41;&#41;&#10;  kalm           &#61; optional&#40;bool, false&#41;&#10;  network_policy &#61; optional&#40;bool, false&#41;&#10;  stateful_ha    &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  horizontal_pod_autoscaling &#61; true&#10;  http_load_balancing        &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [enable_features](variables.tf#L174) | Enable cluster-level features. Certain features allow configuration. | <code title="object&#40;&#123;&#10;  beta_apis                         &#61; optional&#40;list&#40;string&#41;&#41;&#10;  binary_authorization              &#61; optional&#40;bool, false&#41;&#10;  cilium_clusterwide_network_policy &#61; optional&#40;bool, false&#41;&#10;  cost_management                   &#61; optional&#40;bool, false&#41;&#10;  dns &#61; optional&#40;object&#40;&#123;&#10;    provider &#61; optional&#40;string&#41;&#10;    scope    &#61; optional&#40;string&#41;&#10;    domain   &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  database_encryption &#61; optional&#40;object&#40;&#123;&#10;    state    &#61; string&#10;    key_name &#61; string&#10;  &#125;&#41;&#41;&#10;  dataplane_v2         &#61; optional&#40;bool, false&#41;&#10;  fqdn_network_policy  &#61; optional&#40;bool, false&#41;&#10;  gateway_api          &#61; optional&#40;bool, false&#41;&#10;  groups_for_rbac      &#61; optional&#40;string&#41;&#10;  image_streaming      &#61; optional&#40;bool, false&#41;&#10;  intranode_visibility &#61; optional&#40;bool, false&#41;&#10;  l4_ilb_subsetting    &#61; optional&#40;bool, false&#41;&#10;  mesh_certificates    &#61; optional&#40;bool&#41;&#10;  pod_security_policy  &#61; optional&#40;bool, false&#41;&#10;  resource_usage_export &#61; optional&#40;object&#40;&#123;&#10;    dataset                              &#61; string&#10;    enable_network_egress_metering       &#61; optional&#40;bool&#41;&#10;    enable_resource_consumption_metering &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  service_external_ips &#61; optional&#40;bool, true&#41;&#10;  shielded_nodes       &#61; optional&#40;bool, false&#41;&#10;  tpu                  &#61; optional&#40;bool, false&#41;&#10;  upgrade_notifications &#61; optional&#40;object&#40;&#123;&#10;    topic_id &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  vertical_pod_autoscaling &#61; optional&#40;bool, false&#41;&#10;  workload_identity        &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  workload_identity &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [issue_client_certificate](variables.tf#L224) | Enable issuing client certificate. | <code>bool</code> |  | <code>false</code> |
-| [labels](variables.tf#L230) | Cluster resource labels. | <code>map&#40;string&#41;</code> |  | <code>&#123;&#125;</code> |
-| [logging_config](variables.tf#L242) | Logging configuration. | <code title="object&#40;&#123;&#10;  enable_system_logs             &#61; optional&#40;bool, true&#41;&#10;  enable_workloads_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_api_server_logs         &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_logs &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [maintenance_config](variables.tf#L263) | Maintenance window configuration. | <code title="object&#40;&#123;&#10;  daily_window_start_time &#61; optional&#40;string&#41;&#10;  recurring_window &#61; optional&#40;object&#40;&#123;&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    recurrence &#61; string&#10;  &#125;&#41;&#41;&#10;  maintenance_exclusions &#61; optional&#40;list&#40;object&#40;&#123;&#10;    name       &#61; string&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    scope      &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  daily_window_start_time &#61; &#34;03:00&#34;&#10;  recurring_window        &#61; null&#10;  maintenance_exclusion   &#61; &#91;&#93;&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [max_pods_per_node](variables.tf#L286) | Maximum number of pods per node in this cluster. | <code>number</code> |  | <code>110</code> |
-| [min_master_version](variables.tf#L292) | Minimum version of the master, defaults to the version of the most recent official release. | <code>string</code> |  | <code>null</code> |
-| [monitoring_config](variables.tf#L298) | Monitoring configuration. Google Cloud Managed Service for Prometheus is enabled by default. | <code title="object&#40;&#123;&#10;  enable_system_metrics &#61; optional&#40;bool, true&#41;&#10;  enable_api_server_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_metrics          &#61; optional&#40;bool, false&#41;&#10;  enable_daemonset_metrics   &#61; optional&#40;bool, false&#41;&#10;  enable_deployment_metrics  &#61; optional&#40;bool, false&#41;&#10;  enable_hpa_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_pod_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_statefulset_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_storage_metrics     &#61; optional&#40;bool, false&#41;&#10;  enable_managed_prometheus &#61; optional&#40;bool, true&#41;&#10;  advanced_datapath_observability &#61; optional&#40;object&#40;&#123;&#10;    enable_metrics &#61; bool&#10;    enable_relay   &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [node_config](variables.tf#L376) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key &#61; optional&#40;string&#41;&#10;  k8s_labels        &#61; optional&#40;map&#40;string&#41;&#41;&#10;  labels            &#61; optional&#40;map&#40;string&#41;&#41;&#10;  service_account   &#61; optional&#40;string&#41;&#10;  tags              &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [node_locations](variables.tf#L389) | Zones in which the cluster's nodes are located. | <code>list&#40;string&#41;</code> |  | <code>&#91;&#93;</code> |
-| [private_cluster_config](variables.tf#L396) | Private cluster configuration. | <code title="object&#40;&#123;&#10;  enable_private_endpoint &#61; optional&#40;bool&#41;&#10;  master_global_access    &#61; optional&#40;bool&#41;&#10;  peering_config &#61; optional&#40;object&#40;&#123;&#10;    export_routes &#61; optional&#40;bool&#41;&#10;    import_routes &#61; optional&#40;bool&#41;&#10;    project_id    &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [release_channel](variables.tf#L415) | Release channel for GKE upgrades. | <code>string</code> |  | <code>null</code> |
+| [cluster_autoscaling](variables.tf#L39) | Enable and configure limits for Node Auto-Provisioning with Cluster Autoscaler. | <code title="object&#40;&#123;&#10;  enabled             &#61; optional&#40;bool, true&#41;&#10;  autoscaling_profile &#61; optional&#40;string, &#34;BALANCED&#34;&#41;&#10;  auto_provisioning_defaults &#61; optional&#40;object&#40;&#123;&#10;    boot_disk_kms_key &#61; optional&#40;string&#41;&#10;    disk_size         &#61; optional&#40;number&#41;&#10;    disk_type         &#61; optional&#40;string, &#34;pd-standard&#34;&#41;&#10;    image_type        &#61; optional&#40;string&#41;&#10;    oauth_scopes      &#61; optional&#40;list&#40;string&#41;&#41;&#10;    service_account   &#61; optional&#40;string&#41;&#10;    management &#61; optional&#40;object&#40;&#123;&#10;      auto_repair  &#61; optional&#40;bool, true&#41;&#10;      auto_upgrade &#61; optional&#40;bool, true&#41;&#10;    &#125;&#41;&#41;&#10;    shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;      integrity_monitoring &#61; optional&#40;bool, true&#41;&#10;      secure_boot          &#61; optional&#40;bool, false&#41;&#10;    &#125;&#41;&#41;&#10;    upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;      blue_green &#61; optional&#40;object&#40;&#123;&#10;        node_pool_soak_duration &#61; optional&#40;string&#41;&#10;        standard_rollout_policy &#61; optional&#40;object&#40;&#123;&#10;          batch_percentage    &#61; optional&#40;number&#41;&#10;          batch_node_count    &#61; optional&#40;number&#41;&#10;          batch_soak_duration &#61; optional&#40;string&#41;&#10;        &#125;&#41;&#41;&#10;      &#125;&#41;&#41;&#10;      surge &#61; optional&#40;object&#40;&#123;&#10;        max         &#61; optional&#40;number&#41;&#10;        unavailable &#61; optional&#40;number&#41;&#10;      &#125;&#41;&#41;&#10;    &#125;&#41;&#41;&#10;  &#125;&#41;&#41;&#10;  cpu_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; number&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  mem_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; number&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  gpu_resources &#61; optional&#40;list&#40;object&#40;&#123;&#10;    resource_type &#61; string&#10;    min           &#61; number&#10;    max           &#61; number&#10;  &#125;&#41;&#41;&#41;&#10;  tpu_resources &#61; optional&#40;list&#40;object&#40;&#123;&#10;    resource_type &#61; string&#10;    min           &#61; number&#10;    max           &#61; number&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [default_nodepool](variables.tf#L123) | Enable default nodepool. | <code title="object&#40;&#123;&#10;  remove_pool        &#61; optional&#40;bool, true&#41;&#10;  initial_node_count &#61; optional&#40;number, 1&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [deletion_protection](variables.tf#L141) | Whether or not to allow Terraform to destroy the cluster. Unless this field is set to false in Terraform state, a terraform destroy or terraform apply that would delete the cluster will fail. | <code>bool</code> |  | <code>true</code> |
+| [description](variables.tf#L148) | Cluster description. | <code>string</code> |  | <code>null</code> |
+| [enable_addons](variables.tf#L154) | Addons enabled in the cluster (true means enabled). | <code title="object&#40;&#123;&#10;  cloudrun                       &#61; optional&#40;bool, false&#41;&#10;  config_connector               &#61; optional&#40;bool, false&#41;&#10;  dns_cache                      &#61; optional&#40;bool, false&#41;&#10;  gce_persistent_disk_csi_driver &#61; optional&#40;bool, false&#41;&#10;  gcp_filestore_csi_driver       &#61; optional&#40;bool, false&#41;&#10;  gcs_fuse_csi_driver            &#61; optional&#40;bool, false&#41;&#10;  horizontal_pod_autoscaling     &#61; optional&#40;bool, false&#41;&#10;  http_load_balancing            &#61; optional&#40;bool, false&#41;&#10;  istio &#61; optional&#40;object&#40;&#123;&#10;    enable_tls &#61; bool&#10;  &#125;&#41;&#41;&#10;  kalm           &#61; optional&#40;bool, false&#41;&#10;  network_policy &#61; optional&#40;bool, false&#41;&#10;  stateful_ha    &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  horizontal_pod_autoscaling &#61; true&#10;  http_load_balancing        &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [enable_features](variables.tf#L179) | Enable cluster-level features. Certain features allow configuration. | <code title="object&#40;&#123;&#10;  beta_apis                         &#61; optional&#40;list&#40;string&#41;&#41;&#10;  binary_authorization              &#61; optional&#40;bool, false&#41;&#10;  cilium_clusterwide_network_policy &#61; optional&#40;bool, false&#41;&#10;  cost_management                   &#61; optional&#40;bool, false&#41;&#10;  dns &#61; optional&#40;object&#40;&#123;&#10;    provider &#61; optional&#40;string&#41;&#10;    scope    &#61; optional&#40;string&#41;&#10;    domain   &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  database_encryption &#61; optional&#40;object&#40;&#123;&#10;    state    &#61; string&#10;    key_name &#61; string&#10;  &#125;&#41;&#41;&#10;  dataplane_v2         &#61; optional&#40;bool, false&#41;&#10;  fqdn_network_policy  &#61; optional&#40;bool, false&#41;&#10;  gateway_api          &#61; optional&#40;bool, false&#41;&#10;  groups_for_rbac      &#61; optional&#40;string&#41;&#10;  image_streaming      &#61; optional&#40;bool, false&#41;&#10;  intranode_visibility &#61; optional&#40;bool, false&#41;&#10;  l4_ilb_subsetting    &#61; optional&#40;bool, false&#41;&#10;  mesh_certificates    &#61; optional&#40;bool&#41;&#10;  pod_security_policy  &#61; optional&#40;bool, false&#41;&#10;  resource_usage_export &#61; optional&#40;object&#40;&#123;&#10;    dataset                              &#61; string&#10;    enable_network_egress_metering       &#61; optional&#40;bool&#41;&#10;    enable_resource_consumption_metering &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  service_external_ips &#61; optional&#40;bool, true&#41;&#10;  shielded_nodes       &#61; optional&#40;bool, false&#41;&#10;  tpu                  &#61; optional&#40;bool, false&#41;&#10;  upgrade_notifications &#61; optional&#40;object&#40;&#123;&#10;    topic_id &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  vertical_pod_autoscaling &#61; optional&#40;bool, false&#41;&#10;  workload_identity        &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  workload_identity &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [issue_client_certificate](variables.tf#L229) | Enable issuing client certificate. | <code>bool</code> |  | <code>false</code> |
+| [labels](variables.tf#L235) | Cluster resource labels. | <code>map&#40;string&#41;</code> |  | <code>&#123;&#125;</code> |
+| [logging_config](variables.tf#L247) | Logging configuration. | <code title="object&#40;&#123;&#10;  enable_system_logs             &#61; optional&#40;bool, true&#41;&#10;  enable_workloads_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_api_server_logs         &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_logs &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [maintenance_config](variables.tf#L268) | Maintenance window configuration. | <code title="object&#40;&#123;&#10;  daily_window_start_time &#61; optional&#40;string&#41;&#10;  recurring_window &#61; optional&#40;object&#40;&#123;&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    recurrence &#61; string&#10;  &#125;&#41;&#41;&#10;  maintenance_exclusions &#61; optional&#40;list&#40;object&#40;&#123;&#10;    name       &#61; string&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    scope      &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  daily_window_start_time &#61; &#34;03:00&#34;&#10;  recurring_window        &#61; null&#10;  maintenance_exclusion   &#61; &#91;&#93;&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [max_pods_per_node](variables.tf#L291) | Maximum number of pods per node in this cluster. | <code>number</code> |  | <code>110</code> |
+| [min_master_version](variables.tf#L297) | Minimum version of the master, defaults to the version of the most recent official release. | <code>string</code> |  | <code>null</code> |
+| [monitoring_config](variables.tf#L303) | Monitoring configuration. Google Cloud Managed Service for Prometheus is enabled by default. | <code title="object&#40;&#123;&#10;  enable_system_metrics &#61; optional&#40;bool, true&#41;&#10;  enable_api_server_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_metrics          &#61; optional&#40;bool, false&#41;&#10;  enable_daemonset_metrics   &#61; optional&#40;bool, false&#41;&#10;  enable_deployment_metrics  &#61; optional&#40;bool, false&#41;&#10;  enable_hpa_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_pod_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_statefulset_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_storage_metrics     &#61; optional&#40;bool, false&#41;&#10;  enable_managed_prometheus &#61; optional&#40;bool, true&#41;&#10;  advanced_datapath_observability &#61; optional&#40;object&#40;&#123;&#10;    enable_metrics &#61; bool&#10;    enable_relay   &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [node_config](variables.tf#L381) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key &#61; optional&#40;string&#41;&#10;  k8s_labels        &#61; optional&#40;map&#40;string&#41;&#41;&#10;  labels            &#61; optional&#40;map&#40;string&#41;&#41;&#10;  service_account   &#61; optional&#40;string&#41;&#10;  tags              &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [node_locations](variables.tf#L394) | Zones in which the cluster's nodes are located. | <code>list&#40;string&#41;</code> |  | <code>&#91;&#93;</code> |
+| [private_cluster_config](variables.tf#L401) | Private cluster configuration. | <code title="object&#40;&#123;&#10;  enable_private_endpoint &#61; optional&#40;bool&#41;&#10;  master_global_access    &#61; optional&#40;bool&#41;&#10;  peering_config &#61; optional&#40;object&#40;&#123;&#10;    export_routes &#61; optional&#40;bool&#41;&#10;    import_routes &#61; optional&#40;bool&#41;&#10;    project_id    &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [release_channel](variables.tf#L420) | Release channel for GKE upgrades. | <code>string</code> |  | <code>null</code> |
 
 ## Outputs
 
diff --git a/modules/gke-cluster-standard/main.tf b/modules/gke-cluster-standard/main.tf
index 8cea6cebc7..bbd4a1d993 100644
--- a/modules/gke-cluster-standard/main.tf
+++ b/modules/gke-cluster-standard/main.tf
@@ -233,6 +233,19 @@ resource "google_container_cluster" "cluster" {
           maximum       = gpu_resources.value.max
         }
       }
+      dynamic "resource_limits" {
+        for_each = (
+          try(local.cas.tpu_resources, null) == null
+          ? []
+          : local.cas.tpu_resources
+        )
+        iterator = tpu_resources
+        content {
+          resource_type = tpu_resources.value.resource_type
+          minimum       = tpu_resources.value.min
+          maximum       = tpu_resources.value.max
+        }
+      }
     }
   }
   dynamic "database_encryption" {
diff --git a/modules/gke-cluster-standard/variables.tf b/modules/gke-cluster-standard/variables.tf
index 63e16df12f..aa85b04f55 100644
--- a/modules/gke-cluster-standard/variables.tf
+++ b/modules/gke-cluster-standard/variables.tf
@@ -85,6 +85,11 @@ variable "cluster_autoscaling" {
       min           = number
       max           = number
     })))
+    tpu_resources = optional(list(object({
+      resource_type = string
+      min           = number
+      max           = number
+    })))
   })
   default = null
   validation {

From 995c796307f82f61c26c22f3fa2d3ca9d05aa13a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= <aurelien.legrand01@gmail.com>
Date: Tue, 9 Jul 2024 00:10:25 +0200
Subject: [PATCH 2/7] rework of the cluster autoscaling configuration

---
 modules/gke-cluster-standard/README.md    | 37 +++++++++++++++++++++++
 modules/gke-cluster-standard/main.tf      | 25 ++++-----------
 modules/gke-cluster-standard/variables.tf | 13 +++-----
 3 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/modules/gke-cluster-standard/README.md b/modules/gke-cluster-standard/README.md
index 27cb36a41c..2210c3ecb6 100644
--- a/modules/gke-cluster-standard/README.md
+++ b/modules/gke-cluster-standard/README.md
@@ -305,6 +305,43 @@ module "cluster-1" {
 }
 # tftest modules=1 resources=1
 ```
+
+### Node auto-provisioning with GPUs and TPUs
+
+You can use `var.cluster_autoscaling` block to configure node auto-provisioning for the GKE cluster. The example below configures limits for CPU, memory, GPUs and TPUs.
+
+```hcl
+module "cluster-1" {
+  source     = "./fabric/modules/gke-cluster-standard"
+  project_id = var.project_id
+  name       = "cluster-1"
+  location   = "europe-west1-b"
+  vpc_config = {
+    network    = var.vpc.self_link
+    subnetwork = var.subnet.self_link
+    secondary_range_blocks = {}
+  }
+  cluster_autoscaling = {
+    cpu_limits = {
+      max = 48
+    }
+    mem_limits = {
+      max = 182
+    }
+    # Can be GPUs or TPUs
+    accelerator_resources = [{
+        resource_type = "nvidia-l4"
+        max = 2
+      },
+      {
+        resource_type = "tpu-v5-lite-podslice"
+        max = 2
+      }
+    ]
+  }
+}
+# tftest modules=1 resources=1
+```
 <!-- BEGIN TFDOC -->
 ## Variables
 
diff --git a/modules/gke-cluster-standard/main.tf b/modules/gke-cluster-standard/main.tf
index bbd4a1d993..af836c542a 100644
--- a/modules/gke-cluster-standard/main.tf
+++ b/modules/gke-cluster-standard/main.tf
@@ -222,28 +222,15 @@ resource "google_container_cluster" "cluster" {
       }
       dynamic "resource_limits" {
         for_each = (
-          try(local.cas.gpu_resources, null) == null
+          try(local.cas.accelerator_resources, null) == null
           ? []
-          : local.cas.gpu_resources
+          : local.cas.accelerator_resources
         )
-        iterator = gpu_resources
+        iterator = accelerator_resources
         content {
-          resource_type = gpu_resources.value.resource_type
-          minimum       = gpu_resources.value.min
-          maximum       = gpu_resources.value.max
-        }
-      }
-      dynamic "resource_limits" {
-        for_each = (
-          try(local.cas.tpu_resources, null) == null
-          ? []
-          : local.cas.tpu_resources
-        )
-        iterator = tpu_resources
-        content {
-          resource_type = tpu_resources.value.resource_type
-          minimum       = tpu_resources.value.min
-          maximum       = tpu_resources.value.max
+          resource_type = accelerator_resources.value.resource_type
+          minimum       = accelerator_resources.value.min
+          maximum       = accelerator_resources.value.max
         }
       }
     }
diff --git a/modules/gke-cluster-standard/variables.tf b/modules/gke-cluster-standard/variables.tf
index aa85b04f55..8dbf810141 100644
--- a/modules/gke-cluster-standard/variables.tf
+++ b/modules/gke-cluster-standard/variables.tf
@@ -73,21 +73,16 @@ variable "cluster_autoscaling" {
       # add validation rule to ensure only one is present if upgrade settings is defined
     }))
     cpu_limits = optional(object({
-      min = number
+      min = optional(number, 0)
       max = number
     }))
     mem_limits = optional(object({
-      min = number
+      min = optional(number, 0)
       max = number
     }))
-    gpu_resources = optional(list(object({
+    accelerator_resources = optional(list(object({
       resource_type = string
-      min           = number
-      max           = number
-    })))
-    tpu_resources = optional(list(object({
-      resource_type = string
-      min           = number
+      min           = optional(number, 0)
       max           = number
     })))
   })

From 130181bae6d9b918b56d4cca0eb4ed03efbad13a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= <aurelien.legrand01@gmail.com>
Date: Tue, 9 Jul 2024 00:12:09 +0200
Subject: [PATCH 3/7] updated README

---
 modules/gke-cluster-standard/README.md | 43 +++++++++++++-------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/modules/gke-cluster-standard/README.md b/modules/gke-cluster-standard/README.md
index 2210c3ecb6..b4308bb696 100644
--- a/modules/gke-cluster-standard/README.md
+++ b/modules/gke-cluster-standard/README.md
@@ -15,6 +15,7 @@ This module offers a way to create and manage Google Kubernetes Engine (GKE) [St
   - [Cloud DNS](#cloud-dns)
   - [Backup for GKE](#backup-for-gke)
   - [Automatic creation of new secondary ranges](#automatic-creation-of-new-secondary-ranges)
+  - [Node auto-provisioning with GPUs and TPUs](#node-auto-provisioning-with-gpus-and-tpus)
 - [Variables](#variables)
 - [Outputs](#outputs)
 <!-- END TOC -->
@@ -347,28 +348,28 @@ module "cluster-1" {
 
 | name | description | type | required | default |
 |---|---|:---:|:---:|:---:|
-| [location](variables.tf#L242) | Cluster zone or region. | <code>string</code> | ✓ |  |
-| [name](variables.tf#L376) | Cluster name. | <code>string</code> | ✓ |  |
-| [project_id](variables.tf#L415) | Cluster project id. | <code>string</code> | ✓ |  |
-| [vpc_config](variables.tf#L426) | VPC-level configuration. | <code title="object&#40;&#123;&#10;  network                    &#61; string&#10;  subnetwork                 &#61; string&#10;  master_ipv4_cidr_block     &#61; optional&#40;string&#41;&#10;  master_endpoint_subnetwork &#61; optional&#40;string&#41;&#10;  secondary_range_blocks &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; string&#10;    services &#61; string&#10;  &#125;&#41;&#41;&#10;  secondary_range_names &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; optional&#40;string, &#34;pods&#34;&#41;&#10;    services &#61; optional&#40;string, &#34;services&#34;&#41;&#10;  &#125;&#41;&#41;&#10;  additional_ranges        &#61; optional&#40;list&#40;string&#41;&#41;&#10;  master_authorized_ranges &#61; optional&#40;map&#40;string&#41;&#41;&#10;  stack_type               &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ |  |
+| [location](variables.tf#L237) | Cluster zone or region. | <code>string</code> | ✓ |  |
+| [name](variables.tf#L371) | Cluster name. | <code>string</code> | ✓ |  |
+| [project_id](variables.tf#L410) | Cluster project id. | <code>string</code> | ✓ |  |
+| [vpc_config](variables.tf#L421) | VPC-level configuration. | <code title="object&#40;&#123;&#10;  network                    &#61; string&#10;  subnetwork                 &#61; string&#10;  master_ipv4_cidr_block     &#61; optional&#40;string&#41;&#10;  master_endpoint_subnetwork &#61; optional&#40;string&#41;&#10;  secondary_range_blocks &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; string&#10;    services &#61; string&#10;  &#125;&#41;&#41;&#10;  secondary_range_names &#61; optional&#40;object&#40;&#123;&#10;    pods     &#61; optional&#40;string, &#34;pods&#34;&#41;&#10;    services &#61; optional&#40;string, &#34;services&#34;&#41;&#10;  &#125;&#41;&#41;&#10;  additional_ranges        &#61; optional&#40;list&#40;string&#41;&#41;&#10;  master_authorized_ranges &#61; optional&#40;map&#40;string&#41;&#41;&#10;  stack_type               &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ |  |
 | [backup_configs](variables.tf#L17) | Configuration for Backup for GKE. | <code title="object&#40;&#123;&#10;  enable_backup_agent &#61; optional&#40;bool, false&#41;&#10;  backup_plans &#61; optional&#40;map&#40;object&#40;&#123;&#10;    region                            &#61; string&#10;    applications                      &#61; optional&#40;map&#40;list&#40;string&#41;&#41;&#41;&#10;    encryption_key                    &#61; optional&#40;string&#41;&#10;    include_secrets                   &#61; optional&#40;bool, true&#41;&#10;    include_volume_data               &#61; optional&#40;bool, true&#41;&#10;    labels                            &#61; optional&#40;map&#40;string&#41;&#41;&#10;    namespaces                        &#61; optional&#40;list&#40;string&#41;&#41;&#10;    schedule                          &#61; optional&#40;string&#41;&#10;    retention_policy_days             &#61; optional&#40;number&#41;&#10;    retention_policy_lock             &#61; optional&#40;bool, false&#41;&#10;    retention_policy_delete_lock_days &#61; optional&#40;number&#41;&#10;  &#125;&#41;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [cluster_autoscaling](variables.tf#L39) | Enable and configure limits for Node Auto-Provisioning with Cluster Autoscaler. | <code title="object&#40;&#123;&#10;  enabled             &#61; optional&#40;bool, true&#41;&#10;  autoscaling_profile &#61; optional&#40;string, &#34;BALANCED&#34;&#41;&#10;  auto_provisioning_defaults &#61; optional&#40;object&#40;&#123;&#10;    boot_disk_kms_key &#61; optional&#40;string&#41;&#10;    disk_size         &#61; optional&#40;number&#41;&#10;    disk_type         &#61; optional&#40;string, &#34;pd-standard&#34;&#41;&#10;    image_type        &#61; optional&#40;string&#41;&#10;    oauth_scopes      &#61; optional&#40;list&#40;string&#41;&#41;&#10;    service_account   &#61; optional&#40;string&#41;&#10;    management &#61; optional&#40;object&#40;&#123;&#10;      auto_repair  &#61; optional&#40;bool, true&#41;&#10;      auto_upgrade &#61; optional&#40;bool, true&#41;&#10;    &#125;&#41;&#41;&#10;    shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;      integrity_monitoring &#61; optional&#40;bool, true&#41;&#10;      secure_boot          &#61; optional&#40;bool, false&#41;&#10;    &#125;&#41;&#41;&#10;    upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;      blue_green &#61; optional&#40;object&#40;&#123;&#10;        node_pool_soak_duration &#61; optional&#40;string&#41;&#10;        standard_rollout_policy &#61; optional&#40;object&#40;&#123;&#10;          batch_percentage    &#61; optional&#40;number&#41;&#10;          batch_node_count    &#61; optional&#40;number&#41;&#10;          batch_soak_duration &#61; optional&#40;string&#41;&#10;        &#125;&#41;&#41;&#10;      &#125;&#41;&#41;&#10;      surge &#61; optional&#40;object&#40;&#123;&#10;        max         &#61; optional&#40;number&#41;&#10;        unavailable &#61; optional&#40;number&#41;&#10;      &#125;&#41;&#41;&#10;    &#125;&#41;&#41;&#10;  &#125;&#41;&#41;&#10;  cpu_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; number&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  mem_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; number&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  gpu_resources &#61; optional&#40;list&#40;object&#40;&#123;&#10;    resource_type &#61; string&#10;    min           &#61; number&#10;    max           &#61; number&#10;  &#125;&#41;&#41;&#41;&#10;  tpu_resources &#61; optional&#40;list&#40;object&#40;&#123;&#10;    resource_type &#61; string&#10;    min           &#61; number&#10;    max           &#61; number&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [default_nodepool](variables.tf#L123) | Enable default nodepool. | <code title="object&#40;&#123;&#10;  remove_pool        &#61; optional&#40;bool, true&#41;&#10;  initial_node_count &#61; optional&#40;number, 1&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [deletion_protection](variables.tf#L141) | Whether or not to allow Terraform to destroy the cluster. Unless this field is set to false in Terraform state, a terraform destroy or terraform apply that would delete the cluster will fail. | <code>bool</code> |  | <code>true</code> |
-| [description](variables.tf#L148) | Cluster description. | <code>string</code> |  | <code>null</code> |
-| [enable_addons](variables.tf#L154) | Addons enabled in the cluster (true means enabled). | <code title="object&#40;&#123;&#10;  cloudrun                       &#61; optional&#40;bool, false&#41;&#10;  config_connector               &#61; optional&#40;bool, false&#41;&#10;  dns_cache                      &#61; optional&#40;bool, false&#41;&#10;  gce_persistent_disk_csi_driver &#61; optional&#40;bool, false&#41;&#10;  gcp_filestore_csi_driver       &#61; optional&#40;bool, false&#41;&#10;  gcs_fuse_csi_driver            &#61; optional&#40;bool, false&#41;&#10;  horizontal_pod_autoscaling     &#61; optional&#40;bool, false&#41;&#10;  http_load_balancing            &#61; optional&#40;bool, false&#41;&#10;  istio &#61; optional&#40;object&#40;&#123;&#10;    enable_tls &#61; bool&#10;  &#125;&#41;&#41;&#10;  kalm           &#61; optional&#40;bool, false&#41;&#10;  network_policy &#61; optional&#40;bool, false&#41;&#10;  stateful_ha    &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  horizontal_pod_autoscaling &#61; true&#10;  http_load_balancing        &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [enable_features](variables.tf#L179) | Enable cluster-level features. Certain features allow configuration. | <code title="object&#40;&#123;&#10;  beta_apis                         &#61; optional&#40;list&#40;string&#41;&#41;&#10;  binary_authorization              &#61; optional&#40;bool, false&#41;&#10;  cilium_clusterwide_network_policy &#61; optional&#40;bool, false&#41;&#10;  cost_management                   &#61; optional&#40;bool, false&#41;&#10;  dns &#61; optional&#40;object&#40;&#123;&#10;    provider &#61; optional&#40;string&#41;&#10;    scope    &#61; optional&#40;string&#41;&#10;    domain   &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  database_encryption &#61; optional&#40;object&#40;&#123;&#10;    state    &#61; string&#10;    key_name &#61; string&#10;  &#125;&#41;&#41;&#10;  dataplane_v2         &#61; optional&#40;bool, false&#41;&#10;  fqdn_network_policy  &#61; optional&#40;bool, false&#41;&#10;  gateway_api          &#61; optional&#40;bool, false&#41;&#10;  groups_for_rbac      &#61; optional&#40;string&#41;&#10;  image_streaming      &#61; optional&#40;bool, false&#41;&#10;  intranode_visibility &#61; optional&#40;bool, false&#41;&#10;  l4_ilb_subsetting    &#61; optional&#40;bool, false&#41;&#10;  mesh_certificates    &#61; optional&#40;bool&#41;&#10;  pod_security_policy  &#61; optional&#40;bool, false&#41;&#10;  resource_usage_export &#61; optional&#40;object&#40;&#123;&#10;    dataset                              &#61; string&#10;    enable_network_egress_metering       &#61; optional&#40;bool&#41;&#10;    enable_resource_consumption_metering &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  service_external_ips &#61; optional&#40;bool, true&#41;&#10;  shielded_nodes       &#61; optional&#40;bool, false&#41;&#10;  tpu                  &#61; optional&#40;bool, false&#41;&#10;  upgrade_notifications &#61; optional&#40;object&#40;&#123;&#10;    topic_id &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  vertical_pod_autoscaling &#61; optional&#40;bool, false&#41;&#10;  workload_identity        &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  workload_identity &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [issue_client_certificate](variables.tf#L229) | Enable issuing client certificate. | <code>bool</code> |  | <code>false</code> |
-| [labels](variables.tf#L235) | Cluster resource labels. | <code>map&#40;string&#41;</code> |  | <code>&#123;&#125;</code> |
-| [logging_config](variables.tf#L247) | Logging configuration. | <code title="object&#40;&#123;&#10;  enable_system_logs             &#61; optional&#40;bool, true&#41;&#10;  enable_workloads_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_api_server_logs         &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_logs &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [maintenance_config](variables.tf#L268) | Maintenance window configuration. | <code title="object&#40;&#123;&#10;  daily_window_start_time &#61; optional&#40;string&#41;&#10;  recurring_window &#61; optional&#40;object&#40;&#123;&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    recurrence &#61; string&#10;  &#125;&#41;&#41;&#10;  maintenance_exclusions &#61; optional&#40;list&#40;object&#40;&#123;&#10;    name       &#61; string&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    scope      &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  daily_window_start_time &#61; &#34;03:00&#34;&#10;  recurring_window        &#61; null&#10;  maintenance_exclusion   &#61; &#91;&#93;&#10;&#125;">&#123;&#8230;&#125;</code> |
-| [max_pods_per_node](variables.tf#L291) | Maximum number of pods per node in this cluster. | <code>number</code> |  | <code>110</code> |
-| [min_master_version](variables.tf#L297) | Minimum version of the master, defaults to the version of the most recent official release. | <code>string</code> |  | <code>null</code> |
-| [monitoring_config](variables.tf#L303) | Monitoring configuration. Google Cloud Managed Service for Prometheus is enabled by default. | <code title="object&#40;&#123;&#10;  enable_system_metrics &#61; optional&#40;bool, true&#41;&#10;  enable_api_server_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_metrics          &#61; optional&#40;bool, false&#41;&#10;  enable_daemonset_metrics   &#61; optional&#40;bool, false&#41;&#10;  enable_deployment_metrics  &#61; optional&#40;bool, false&#41;&#10;  enable_hpa_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_pod_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_statefulset_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_storage_metrics     &#61; optional&#40;bool, false&#41;&#10;  enable_managed_prometheus &#61; optional&#40;bool, true&#41;&#10;  advanced_datapath_observability &#61; optional&#40;object&#40;&#123;&#10;    enable_metrics &#61; bool&#10;    enable_relay   &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [node_config](variables.tf#L381) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key &#61; optional&#40;string&#41;&#10;  k8s_labels        &#61; optional&#40;map&#40;string&#41;&#41;&#10;  labels            &#61; optional&#40;map&#40;string&#41;&#41;&#10;  service_account   &#61; optional&#40;string&#41;&#10;  tags              &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [node_locations](variables.tf#L394) | Zones in which the cluster's nodes are located. | <code>list&#40;string&#41;</code> |  | <code>&#91;&#93;</code> |
-| [private_cluster_config](variables.tf#L401) | Private cluster configuration. | <code title="object&#40;&#123;&#10;  enable_private_endpoint &#61; optional&#40;bool&#41;&#10;  master_global_access    &#61; optional&#40;bool&#41;&#10;  peering_config &#61; optional&#40;object&#40;&#123;&#10;    export_routes &#61; optional&#40;bool&#41;&#10;    import_routes &#61; optional&#40;bool&#41;&#10;    project_id    &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [release_channel](variables.tf#L420) | Release channel for GKE upgrades. | <code>string</code> |  | <code>null</code> |
+| [cluster_autoscaling](variables.tf#L39) | Enable and configure limits for Node Auto-Provisioning with Cluster Autoscaler. | <code title="object&#40;&#123;&#10;  enabled             &#61; optional&#40;bool, true&#41;&#10;  autoscaling_profile &#61; optional&#40;string, &#34;BALANCED&#34;&#41;&#10;  auto_provisioning_defaults &#61; optional&#40;object&#40;&#123;&#10;    boot_disk_kms_key &#61; optional&#40;string&#41;&#10;    disk_size         &#61; optional&#40;number&#41;&#10;    disk_type         &#61; optional&#40;string, &#34;pd-standard&#34;&#41;&#10;    image_type        &#61; optional&#40;string&#41;&#10;    oauth_scopes      &#61; optional&#40;list&#40;string&#41;&#41;&#10;    service_account   &#61; optional&#40;string&#41;&#10;    management &#61; optional&#40;object&#40;&#123;&#10;      auto_repair  &#61; optional&#40;bool, true&#41;&#10;      auto_upgrade &#61; optional&#40;bool, true&#41;&#10;    &#125;&#41;&#41;&#10;    shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;      integrity_monitoring &#61; optional&#40;bool, true&#41;&#10;      secure_boot          &#61; optional&#40;bool, false&#41;&#10;    &#125;&#41;&#41;&#10;    upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;      blue_green &#61; optional&#40;object&#40;&#123;&#10;        node_pool_soak_duration &#61; optional&#40;string&#41;&#10;        standard_rollout_policy &#61; optional&#40;object&#40;&#123;&#10;          batch_percentage    &#61; optional&#40;number&#41;&#10;          batch_node_count    &#61; optional&#40;number&#41;&#10;          batch_soak_duration &#61; optional&#40;string&#41;&#10;        &#125;&#41;&#41;&#10;      &#125;&#41;&#41;&#10;      surge &#61; optional&#40;object&#40;&#123;&#10;        max         &#61; optional&#40;number&#41;&#10;        unavailable &#61; optional&#40;number&#41;&#10;      &#125;&#41;&#41;&#10;    &#125;&#41;&#41;&#10;  &#125;&#41;&#41;&#10;  cpu_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; optional&#40;number, 0&#41;&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  mem_limits &#61; optional&#40;object&#40;&#123;&#10;    min &#61; optional&#40;number, 0&#41;&#10;    max &#61; number&#10;  &#125;&#41;&#41;&#10;  accelerator_resources &#61; optional&#40;list&#40;object&#40;&#123;&#10;    resource_type &#61; string&#10;    min           &#61; optional&#40;number, 0&#41;&#10;    max           &#61; number&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [default_nodepool](variables.tf#L118) | Enable default nodepool. | <code title="object&#40;&#123;&#10;  remove_pool        &#61; optional&#40;bool, true&#41;&#10;  initial_node_count &#61; optional&#40;number, 1&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [deletion_protection](variables.tf#L136) | Whether or not to allow Terraform to destroy the cluster. Unless this field is set to false in Terraform state, a terraform destroy or terraform apply that would delete the cluster will fail. | <code>bool</code> |  | <code>true</code> |
+| [description](variables.tf#L143) | Cluster description. | <code>string</code> |  | <code>null</code> |
+| [enable_addons](variables.tf#L149) | Addons enabled in the cluster (true means enabled). | <code title="object&#40;&#123;&#10;  cloudrun                       &#61; optional&#40;bool, false&#41;&#10;  config_connector               &#61; optional&#40;bool, false&#41;&#10;  dns_cache                      &#61; optional&#40;bool, false&#41;&#10;  gce_persistent_disk_csi_driver &#61; optional&#40;bool, false&#41;&#10;  gcp_filestore_csi_driver       &#61; optional&#40;bool, false&#41;&#10;  gcs_fuse_csi_driver            &#61; optional&#40;bool, false&#41;&#10;  horizontal_pod_autoscaling     &#61; optional&#40;bool, false&#41;&#10;  http_load_balancing            &#61; optional&#40;bool, false&#41;&#10;  istio &#61; optional&#40;object&#40;&#123;&#10;    enable_tls &#61; bool&#10;  &#125;&#41;&#41;&#10;  kalm           &#61; optional&#40;bool, false&#41;&#10;  network_policy &#61; optional&#40;bool, false&#41;&#10;  stateful_ha    &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  horizontal_pod_autoscaling &#61; true&#10;  http_load_balancing        &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [enable_features](variables.tf#L174) | Enable cluster-level features. Certain features allow configuration. | <code title="object&#40;&#123;&#10;  beta_apis                         &#61; optional&#40;list&#40;string&#41;&#41;&#10;  binary_authorization              &#61; optional&#40;bool, false&#41;&#10;  cilium_clusterwide_network_policy &#61; optional&#40;bool, false&#41;&#10;  cost_management                   &#61; optional&#40;bool, false&#41;&#10;  dns &#61; optional&#40;object&#40;&#123;&#10;    provider &#61; optional&#40;string&#41;&#10;    scope    &#61; optional&#40;string&#41;&#10;    domain   &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  database_encryption &#61; optional&#40;object&#40;&#123;&#10;    state    &#61; string&#10;    key_name &#61; string&#10;  &#125;&#41;&#41;&#10;  dataplane_v2         &#61; optional&#40;bool, false&#41;&#10;  fqdn_network_policy  &#61; optional&#40;bool, false&#41;&#10;  gateway_api          &#61; optional&#40;bool, false&#41;&#10;  groups_for_rbac      &#61; optional&#40;string&#41;&#10;  image_streaming      &#61; optional&#40;bool, false&#41;&#10;  intranode_visibility &#61; optional&#40;bool, false&#41;&#10;  l4_ilb_subsetting    &#61; optional&#40;bool, false&#41;&#10;  mesh_certificates    &#61; optional&#40;bool&#41;&#10;  pod_security_policy  &#61; optional&#40;bool, false&#41;&#10;  resource_usage_export &#61; optional&#40;object&#40;&#123;&#10;    dataset                              &#61; string&#10;    enable_network_egress_metering       &#61; optional&#40;bool&#41;&#10;    enable_resource_consumption_metering &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  service_external_ips &#61; optional&#40;bool, true&#41;&#10;  shielded_nodes       &#61; optional&#40;bool, false&#41;&#10;  tpu                  &#61; optional&#40;bool, false&#41;&#10;  upgrade_notifications &#61; optional&#40;object&#40;&#123;&#10;    topic_id &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  vertical_pod_autoscaling &#61; optional&#40;bool, false&#41;&#10;  workload_identity        &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  workload_identity &#61; true&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [issue_client_certificate](variables.tf#L224) | Enable issuing client certificate. | <code>bool</code> |  | <code>false</code> |
+| [labels](variables.tf#L230) | Cluster resource labels. | <code>map&#40;string&#41;</code> |  | <code>&#123;&#125;</code> |
+| [logging_config](variables.tf#L242) | Logging configuration. | <code title="object&#40;&#123;&#10;  enable_system_logs             &#61; optional&#40;bool, true&#41;&#10;  enable_workloads_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_api_server_logs         &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_logs          &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_logs &#61; optional&#40;bool, false&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [maintenance_config](variables.tf#L263) | Maintenance window configuration. | <code title="object&#40;&#123;&#10;  daily_window_start_time &#61; optional&#40;string&#41;&#10;  recurring_window &#61; optional&#40;object&#40;&#123;&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    recurrence &#61; string&#10;  &#125;&#41;&#41;&#10;  maintenance_exclusions &#61; optional&#40;list&#40;object&#40;&#123;&#10;    name       &#61; string&#10;    start_time &#61; string&#10;    end_time   &#61; string&#10;    scope      &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  daily_window_start_time &#61; &#34;03:00&#34;&#10;  recurring_window        &#61; null&#10;  maintenance_exclusion   &#61; &#91;&#93;&#10;&#125;">&#123;&#8230;&#125;</code> |
+| [max_pods_per_node](variables.tf#L286) | Maximum number of pods per node in this cluster. | <code>number</code> |  | <code>110</code> |
+| [min_master_version](variables.tf#L292) | Minimum version of the master, defaults to the version of the most recent official release. | <code>string</code> |  | <code>null</code> |
+| [monitoring_config](variables.tf#L298) | Monitoring configuration. Google Cloud Managed Service for Prometheus is enabled by default. | <code title="object&#40;&#123;&#10;  enable_system_metrics &#61; optional&#40;bool, true&#41;&#10;  enable_api_server_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_controller_manager_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_scheduler_metrics          &#61; optional&#40;bool, false&#41;&#10;  enable_daemonset_metrics   &#61; optional&#40;bool, false&#41;&#10;  enable_deployment_metrics  &#61; optional&#40;bool, false&#41;&#10;  enable_hpa_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_pod_metrics         &#61; optional&#40;bool, false&#41;&#10;  enable_statefulset_metrics &#61; optional&#40;bool, false&#41;&#10;  enable_storage_metrics     &#61; optional&#40;bool, false&#41;&#10;  enable_managed_prometheus &#61; optional&#40;bool, true&#41;&#10;  advanced_datapath_observability &#61; optional&#40;object&#40;&#123;&#10;    enable_metrics &#61; bool&#10;    enable_relay   &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [node_config](variables.tf#L376) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key &#61; optional&#40;string&#41;&#10;  k8s_labels        &#61; optional&#40;map&#40;string&#41;&#41;&#10;  labels            &#61; optional&#40;map&#40;string&#41;&#41;&#10;  service_account   &#61; optional&#40;string&#41;&#10;  tags              &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [node_locations](variables.tf#L389) | Zones in which the cluster's nodes are located. | <code>list&#40;string&#41;</code> |  | <code>&#91;&#93;</code> |
+| [private_cluster_config](variables.tf#L396) | Private cluster configuration. | <code title="object&#40;&#123;&#10;  enable_private_endpoint &#61; optional&#40;bool&#41;&#10;  master_global_access    &#61; optional&#40;bool&#41;&#10;  peering_config &#61; optional&#40;object&#40;&#123;&#10;    export_routes &#61; optional&#40;bool&#41;&#10;    import_routes &#61; optional&#40;bool&#41;&#10;    project_id    &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [release_channel](variables.tf#L415) | Release channel for GKE upgrades. | <code>string</code> |  | <code>null</code> |
 
 ## Outputs
 

From 1f9fb15ab6f37fa385a9e6127f4a68c38de8172b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= <aurelien.legrand01@gmail.com>
Date: Tue, 9 Jul 2024 00:23:02 +0200
Subject: [PATCH 4/7] adding queued_provisioning (DWS) attribute

---
 modules/gke-nodepool/README.md    | 16 ++++++++--------
 modules/gke-nodepool/main.tf      |  7 +++++++
 modules/gke-nodepool/variables.tf |  1 +
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/modules/gke-nodepool/README.md b/modules/gke-nodepool/README.md
index 453fff8d62..983c8a5041 100644
--- a/modules/gke-nodepool/README.md
+++ b/modules/gke-nodepool/README.md
@@ -143,7 +143,7 @@ module "cluster-1-nodepool-gpu-1" {
 |---|---|:---:|:---:|:---:|
 | [cluster_name](variables.tf#L23) | Cluster name. | <code>string</code> | ✓ |  |
 | [location](variables.tf#L48) | Cluster location. | <code>string</code> | ✓ |  |
-| [project_id](variables.tf#L181) | Cluster project id. | <code>string</code> | ✓ |  |
+| [project_id](variables.tf#L182) | Cluster project id. | <code>string</code> | ✓ |  |
 | [cluster_id](variables.tf#L17) | Cluster id. Optional, but providing cluster_id is recommended to prevent cluster misconfiguration in some of the edge cases. | <code>string</code> |  | <code>null</code> |
 | [gke_version](variables.tf#L28) | Kubernetes nodes version. Ignored if auto_upgrade is set in management_config. | <code>string</code> |  | <code>null</code> |
 | [k8s_labels](variables.tf#L34) | Kubernetes labels applied to each node. | <code>map&#40;string&#41;</code> |  | <code>&#123;&#125;</code> |
@@ -153,13 +153,13 @@ module "cluster-1-nodepool-gpu-1" {
 | [node_config](variables.tf#L65) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key   &#61; optional&#40;string&#41;&#10;  disk_size_gb        &#61; optional&#40;number&#41;&#10;  disk_type           &#61; optional&#40;string&#41;&#10;  ephemeral_ssd_count &#61; optional&#40;number&#41;&#10;  gcfs                &#61; optional&#40;bool, false&#41;&#10;  guest_accelerator &#61; optional&#40;object&#40;&#123;&#10;    count &#61; number&#10;    type  &#61; string&#10;    gpu_driver &#61; optional&#40;object&#40;&#123;&#10;      version                    &#61; string&#10;      partition_size             &#61; optional&#40;string&#41;&#10;      max_shared_clients_per_gpu &#61; optional&#40;number&#41;&#10;    &#125;&#41;&#41;&#10;  &#125;&#41;&#41;&#10;  local_nvme_ssd_count &#61; optional&#40;number&#41;&#10;  gvnic                &#61; optional&#40;bool, false&#41;&#10;  image_type           &#61; optional&#40;string&#41;&#10;  kubelet_config &#61; optional&#40;object&#40;&#123;&#10;    cpu_manager_policy   &#61; string&#10;    cpu_cfs_quota        &#61; optional&#40;bool&#41;&#10;    cpu_cfs_quota_period &#61; optional&#40;string&#41;&#10;    pod_pids_limit       &#61; optional&#40;number&#41;&#10;  &#125;&#41;&#41;&#10;  linux_node_config &#61; optional&#40;object&#40;&#123;&#10;    sysctls     &#61; optional&#40;map&#40;string&#41;&#41;&#10;    cgroup_mode &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  local_ssd_count       &#61; optional&#40;number&#41;&#10;  machine_type          &#61; optional&#40;string&#41;&#10;  metadata              &#61; optional&#40;map&#40;string&#41;&#41;&#10;  min_cpu_platform      &#61; optional&#40;string&#41;&#10;  preemptible           &#61; optional&#40;bool&#41;&#10;  sandbox_config_gvisor &#61; optional&#40;bool&#41;&#10;  shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;    enable_integrity_monitoring &#61; optional&#40;bool&#41;&#10;    enable_secure_boot          &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  spot                          &#61; optional&#40;bool&#41;&#10;  workload_metadata_config_mode &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  disk_type &#61; &#34;pd-balanced&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
 | [node_count](variables.tf#L124) | Number of nodes per instance group. Initial value can only be changed by recreation, current is ignored when autoscaling is used. | <code title="object&#40;&#123;&#10;  current &#61; optional&#40;number&#41;&#10;  initial &#61; number&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  initial &#61; 1&#10;&#125;">&#123;&#8230;&#125;</code> |
 | [node_locations](variables.tf#L136) | Node locations. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
-| [nodepool_config](variables.tf#L142) | Nodepool-level configuration. | <code title="object&#40;&#123;&#10;  autoscaling &#61; optional&#40;object&#40;&#123;&#10;    location_policy &#61; optional&#40;string&#41;&#10;    max_node_count  &#61; optional&#40;number&#41;&#10;    min_node_count  &#61; optional&#40;number&#41;&#10;    use_total_nodes &#61; optional&#40;bool, false&#41;&#10;  &#125;&#41;&#41;&#10;  management &#61; optional&#40;object&#40;&#123;&#10;    auto_repair  &#61; optional&#40;bool&#41;&#10;    auto_upgrade &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  placement_policy &#61; optional&#40;object&#40;&#123;&#10;    type         &#61; string&#10;    policy_name  &#61; optional&#40;string&#41;&#10;    tpu_topology &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;    max_surge       &#61; number&#10;    max_unavailable &#61; number&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [pod_range](variables.tf#L168) | Pod secondary range configuration. | <code title="object&#40;&#123;&#10;  secondary_pod_range &#61; object&#40;&#123;&#10;    name                 &#61; string&#10;    cidr                 &#61; optional&#40;string&#41;&#10;    create               &#61; optional&#40;bool&#41;&#10;    enable_private_nodes &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [reservation_affinity](variables.tf#L186) | Configuration of the desired reservation which instances could take capacity from. | <code title="object&#40;&#123;&#10;  consume_reservation_type &#61; string&#10;  key                      &#61; optional&#40;string&#41;&#10;  values                   &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
-| [service_account](variables.tf#L196) | Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used. | <code title="object&#40;&#123;&#10;  create       &#61; optional&#40;bool, false&#41;&#10;  email        &#61; optional&#40;string&#41;&#10;  oauth_scopes &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
-| [sole_tenant_nodegroup](variables.tf#L207) | Sole tenant node group. | <code>string</code> |  | <code>null</code> |
-| [tags](variables.tf#L213) | Network tags applied to nodes. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
-| [taints](variables.tf#L219) | Kubernetes taints applied to all nodes. | <code title="map&#40;object&#40;&#123;&#10;  value  &#61; string&#10;  effect &#61; string&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [nodepool_config](variables.tf#L142) | Nodepool-level configuration. | <code title="object&#40;&#123;&#10;  autoscaling &#61; optional&#40;object&#40;&#123;&#10;    location_policy &#61; optional&#40;string&#41;&#10;    max_node_count  &#61; optional&#40;number&#41;&#10;    min_node_count  &#61; optional&#40;number&#41;&#10;    use_total_nodes &#61; optional&#40;bool, false&#41;&#10;  &#125;&#41;&#41;&#10;  management &#61; optional&#40;object&#40;&#123;&#10;    auto_repair  &#61; optional&#40;bool&#41;&#10;    auto_upgrade &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  placement_policy &#61; optional&#40;object&#40;&#123;&#10;    type         &#61; string&#10;    policy_name  &#61; optional&#40;string&#41;&#10;    tpu_topology &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  queued_provisioning &#61; optional&#40;bool&#41;&#10;  upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;    max_surge       &#61; number&#10;    max_unavailable &#61; number&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [pod_range](variables.tf#L169) | Pod secondary range configuration. | <code title="object&#40;&#123;&#10;  secondary_pod_range &#61; object&#40;&#123;&#10;    name                 &#61; string&#10;    cidr                 &#61; optional&#40;string&#41;&#10;    create               &#61; optional&#40;bool&#41;&#10;    enable_private_nodes &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [reservation_affinity](variables.tf#L187) | Configuration of the desired reservation which instances could take capacity from. | <code title="object&#40;&#123;&#10;  consume_reservation_type &#61; string&#10;  key                      &#61; optional&#40;string&#41;&#10;  values                   &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [service_account](variables.tf#L197) | Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used. | <code title="object&#40;&#123;&#10;  create       &#61; optional&#40;bool, false&#41;&#10;  email        &#61; optional&#40;string&#41;&#10;  oauth_scopes &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
+| [sole_tenant_nodegroup](variables.tf#L208) | Sole tenant node group. | <code>string</code> |  | <code>null</code> |
+| [tags](variables.tf#L214) | Network tags applied to nodes. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
+| [taints](variables.tf#L220) | Kubernetes taints applied to all nodes. | <code title="map&#40;object&#40;&#123;&#10;  value  &#61; string&#10;  effect &#61; string&#10;&#125;&#41;&#41;">map&#40;object&#40;&#123;&#8230;&#125;&#41;&#41;</code> |  | <code>&#123;&#125;</code> |
 
 ## Outputs
 
diff --git a/modules/gke-nodepool/main.tf b/modules/gke-nodepool/main.tf
index b94ef697c8..61a11ad283 100644
--- a/modules/gke-nodepool/main.tf
+++ b/modules/gke-nodepool/main.tf
@@ -137,6 +137,13 @@ resource "google_container_node_pool" "nodepool" {
     }
   }
 
+  dynamic "queued_provisioning" {
+    for_each = try(var.nodepool_config.queued_provisioning, null) != null ? [""] : []
+    content {
+      enabled = var.nodepool_config.queued_provisioning
+    }
+  }
+
   node_config {
     boot_disk_kms_key = var.node_config.boot_disk_kms_key
     disk_size_gb      = var.node_config.disk_size_gb
diff --git a/modules/gke-nodepool/variables.tf b/modules/gke-nodepool/variables.tf
index c970c5b1bd..bfbddfe4d9 100644
--- a/modules/gke-nodepool/variables.tf
+++ b/modules/gke-nodepool/variables.tf
@@ -157,6 +157,7 @@ variable "nodepool_config" {
       policy_name  = optional(string)
       tpu_topology = optional(string)
     }))
+    queued_provisioning = optional(bool)
     upgrade_settings = optional(object({
       max_surge       = number
       max_unavailable = number

From 5158683e411f15ab28faff8403b372603e970bbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= <aurelien.legrand01@gmail.com>
Date: Tue, 9 Jul 2024 19:15:09 +0200
Subject: [PATCH 5/7] Adding support for DWS for GKE nodepools

---
 modules/gke-nodepool/README.md    | 50 ++++++++++++++++++++++++++++++-
 modules/gke-nodepool/main.tf      |  2 +-
 modules/gke-nodepool/variables.tf |  2 +-
 3 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/modules/gke-nodepool/README.md b/modules/gke-nodepool/README.md
index 983c8a5041..6290c241eb 100644
--- a/modules/gke-nodepool/README.md
+++ b/modules/gke-nodepool/README.md
@@ -136,6 +136,54 @@ module "cluster-1-nodepool-gpu-1" {
 }
 # tftest modules=1 resources=2 inventory=guest-accelerator.yaml
 ```
+
+### Dynamic Workload Scheduler (DWS) & node pool configuration 
+This example uses Dynamic Workload Scheduler (DWS) to configure a GPU nodepool.
+
+```hcl
+module "cluster-1-nodepool-gpu-1" {
+  source       = "./fabric/modules/gke-nodepool"
+  project_id   = "myproject"
+  cluster_name = "cluster-1"
+  location     = "europe-west4-a"
+  name         = "nodepool-gpu-1"
+  k8s_labels   = { environment = "dev" }
+  service_account = {
+    create       = true
+    email        = "nodepool-gpu-1" # optional
+    oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
+  }
+  nnode_config = {
+    machine_type        = "g2-standard-4"
+    disk_size_gb        = 50
+    disk_type           = "pd-ssd"
+    ephemeral_ssd_count = 1
+    gvnic               = true
+    spot                = true
+    guest_accelerator = {
+      type  = "nvidia-l4"
+      count = 1
+      gpu_driver = {
+        version = "LATEST"
+      }
+    }
+  }
+  nodepool_config = {
+    autoscaling = {
+      max_node_count = 10
+      min_node_count = 0
+    }
+    queued_provisioning = true
+  }
+  node_count = {
+    initial = 0
+  }
+  reservation_affinity = {
+    consume_reservation_type = "NO_RESERVATION"
+  }
+}
+# tftest modules=1 resources=2 inventory=guest-accelerator.yaml
+```
 <!-- BEGIN TFDOC -->
 ## Variables
 
@@ -153,7 +201,7 @@ module "cluster-1-nodepool-gpu-1" {
 | [node_config](variables.tf#L65) | Node-level configuration. | <code title="object&#40;&#123;&#10;  boot_disk_kms_key   &#61; optional&#40;string&#41;&#10;  disk_size_gb        &#61; optional&#40;number&#41;&#10;  disk_type           &#61; optional&#40;string&#41;&#10;  ephemeral_ssd_count &#61; optional&#40;number&#41;&#10;  gcfs                &#61; optional&#40;bool, false&#41;&#10;  guest_accelerator &#61; optional&#40;object&#40;&#123;&#10;    count &#61; number&#10;    type  &#61; string&#10;    gpu_driver &#61; optional&#40;object&#40;&#123;&#10;      version                    &#61; string&#10;      partition_size             &#61; optional&#40;string&#41;&#10;      max_shared_clients_per_gpu &#61; optional&#40;number&#41;&#10;    &#125;&#41;&#41;&#10;  &#125;&#41;&#41;&#10;  local_nvme_ssd_count &#61; optional&#40;number&#41;&#10;  gvnic                &#61; optional&#40;bool, false&#41;&#10;  image_type           &#61; optional&#40;string&#41;&#10;  kubelet_config &#61; optional&#40;object&#40;&#123;&#10;    cpu_manager_policy   &#61; string&#10;    cpu_cfs_quota        &#61; optional&#40;bool&#41;&#10;    cpu_cfs_quota_period &#61; optional&#40;string&#41;&#10;    pod_pids_limit       &#61; optional&#40;number&#41;&#10;  &#125;&#41;&#41;&#10;  linux_node_config &#61; optional&#40;object&#40;&#123;&#10;    sysctls     &#61; optional&#40;map&#40;string&#41;&#41;&#10;    cgroup_mode &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  local_ssd_count       &#61; optional&#40;number&#41;&#10;  machine_type          &#61; optional&#40;string&#41;&#10;  metadata              &#61; optional&#40;map&#40;string&#41;&#41;&#10;  min_cpu_platform      &#61; optional&#40;string&#41;&#10;  preemptible           &#61; optional&#40;bool&#41;&#10;  sandbox_config_gvisor &#61; optional&#40;bool&#41;&#10;  shielded_instance_config &#61; optional&#40;object&#40;&#123;&#10;    enable_integrity_monitoring &#61; optional&#40;bool&#41;&#10;    enable_secure_boot          &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  spot                          &#61; optional&#40;bool&#41;&#10;  workload_metadata_config_mode &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  disk_type &#61; &#34;pd-balanced&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
 | [node_count](variables.tf#L124) | Number of nodes per instance group. Initial value can only be changed by recreation, current is ignored when autoscaling is used. | <code title="object&#40;&#123;&#10;  current &#61; optional&#40;number&#41;&#10;  initial &#61; number&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code title="&#123;&#10;  initial &#61; 1&#10;&#125;">&#123;&#8230;&#125;</code> |
 | [node_locations](variables.tf#L136) | Node locations. | <code>list&#40;string&#41;</code> |  | <code>null</code> |
-| [nodepool_config](variables.tf#L142) | Nodepool-level configuration. | <code title="object&#40;&#123;&#10;  autoscaling &#61; optional&#40;object&#40;&#123;&#10;    location_policy &#61; optional&#40;string&#41;&#10;    max_node_count  &#61; optional&#40;number&#41;&#10;    min_node_count  &#61; optional&#40;number&#41;&#10;    use_total_nodes &#61; optional&#40;bool, false&#41;&#10;  &#125;&#41;&#41;&#10;  management &#61; optional&#40;object&#40;&#123;&#10;    auto_repair  &#61; optional&#40;bool&#41;&#10;    auto_upgrade &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  placement_policy &#61; optional&#40;object&#40;&#123;&#10;    type         &#61; string&#10;    policy_name  &#61; optional&#40;string&#41;&#10;    tpu_topology &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  queued_provisioning &#61; optional&#40;bool&#41;&#10;  upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;    max_surge       &#61; number&#10;    max_unavailable &#61; number&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
+| [nodepool_config](variables.tf#L142) | Nodepool-level configuration. | <code title="object&#40;&#123;&#10;  autoscaling &#61; optional&#40;object&#40;&#123;&#10;    location_policy &#61; optional&#40;string&#41;&#10;    max_node_count  &#61; optional&#40;number&#41;&#10;    min_node_count  &#61; optional&#40;number&#41;&#10;    use_total_nodes &#61; optional&#40;bool, false&#41;&#10;  &#125;&#41;&#41;&#10;  management &#61; optional&#40;object&#40;&#123;&#10;    auto_repair  &#61; optional&#40;bool&#41;&#10;    auto_upgrade &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#41;&#10;  placement_policy &#61; optional&#40;object&#40;&#123;&#10;    type         &#61; string&#10;    policy_name  &#61; optional&#40;string&#41;&#10;    tpu_topology &#61; optional&#40;string&#41;&#10;  &#125;&#41;&#41;&#10;  queued_provisioning &#61; optional&#40;bool, false&#41;&#10;  upgrade_settings &#61; optional&#40;object&#40;&#123;&#10;    max_surge       &#61; number&#10;    max_unavailable &#61; number&#10;  &#125;&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
 | [pod_range](variables.tf#L169) | Pod secondary range configuration. | <code title="object&#40;&#123;&#10;  secondary_pod_range &#61; object&#40;&#123;&#10;    name                 &#61; string&#10;    cidr                 &#61; optional&#40;string&#41;&#10;    create               &#61; optional&#40;bool&#41;&#10;    enable_private_nodes &#61; optional&#40;bool&#41;&#10;  &#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
 | [reservation_affinity](variables.tf#L187) | Configuration of the desired reservation which instances could take capacity from. | <code title="object&#40;&#123;&#10;  consume_reservation_type &#61; string&#10;  key                      &#61; optional&#40;string&#41;&#10;  values                   &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>null</code> |
 | [service_account](variables.tf#L197) | Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used. | <code title="object&#40;&#123;&#10;  create       &#61; optional&#40;bool, false&#41;&#10;  email        &#61; optional&#40;string&#41;&#10;  oauth_scopes &#61; optional&#40;list&#40;string&#41;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> |  | <code>&#123;&#125;</code> |
diff --git a/modules/gke-nodepool/main.tf b/modules/gke-nodepool/main.tf
index 61a11ad283..5544a60066 100644
--- a/modules/gke-nodepool/main.tf
+++ b/modules/gke-nodepool/main.tf
@@ -138,7 +138,7 @@ resource "google_container_node_pool" "nodepool" {
   }
 
   dynamic "queued_provisioning" {
-    for_each = try(var.nodepool_config.queued_provisioning, null) != null ? [""] : []
+    for_each = try(var.nodepool_config.queued_provisioning, false) ? [""] : []
     content {
       enabled = var.nodepool_config.queued_provisioning
     }
diff --git a/modules/gke-nodepool/variables.tf b/modules/gke-nodepool/variables.tf
index bfbddfe4d9..1796674593 100644
--- a/modules/gke-nodepool/variables.tf
+++ b/modules/gke-nodepool/variables.tf
@@ -157,7 +157,7 @@ variable "nodepool_config" {
       policy_name  = optional(string)
       tpu_topology = optional(string)
     }))
-    queued_provisioning = optional(bool)
+    queued_provisioning = optional(bool, false)
     upgrade_settings = optional(object({
       max_surge       = number
       max_unavailable = number

From 3b6c5237e9c15eb1dd5349ddf20c347a1add5104 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= <aurelien.legrand01@gmail.com>
Date: Wed, 10 Jul 2024 14:55:00 +0200
Subject: [PATCH 6/7] typo

---
 modules/gke-nodepool/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/gke-nodepool/README.md b/modules/gke-nodepool/README.md
index 6290c241eb..2d1392a713 100644
--- a/modules/gke-nodepool/README.md
+++ b/modules/gke-nodepool/README.md
@@ -153,7 +153,7 @@ module "cluster-1-nodepool-gpu-1" {
     email        = "nodepool-gpu-1" # optional
     oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
   }
-  nnode_config = {
+  node_config = {
     machine_type        = "g2-standard-4"
     disk_size_gb        = 50
     disk_type           = "pd-ssd"

From b83e7fdc0a9cb92f77878c35d9a9005ed4128f6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Legrand?= <aurelien.legrand01@gmail.com>
Date: Wed, 10 Jul 2024 14:58:45 +0200
Subject: [PATCH 7/7] adding test for DWS

---
 modules/gke-nodepool/README.md               |  6 +--
 tests/modules/gke_nodepool/examples/dws.yaml | 39 ++++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)
 create mode 100644 tests/modules/gke_nodepool/examples/dws.yaml

diff --git a/modules/gke-nodepool/README.md b/modules/gke-nodepool/README.md
index 2d1392a713..002bd832e7 100644
--- a/modules/gke-nodepool/README.md
+++ b/modules/gke-nodepool/README.md
@@ -141,12 +141,12 @@ module "cluster-1-nodepool-gpu-1" {
 This example uses Dynamic Workload Scheduler (DWS) to configure a GPU nodepool.
 
 ```hcl
-module "cluster-1-nodepool-gpu-1" {
+module "cluster-1-nodepool-dws" {
   source       = "./fabric/modules/gke-nodepool"
   project_id   = "myproject"
   cluster_name = "cluster-1"
   location     = "europe-west4-a"
-  name         = "nodepool-gpu-1"
+  name         = "nodepool-dws"
   k8s_labels   = { environment = "dev" }
   service_account = {
     create       = true
@@ -182,7 +182,7 @@ module "cluster-1-nodepool-gpu-1" {
     consume_reservation_type = "NO_RESERVATION"
   }
 }
-# tftest modules=1 resources=2 inventory=guest-accelerator.yaml
+# tftest modules=1 resources=2 inventory=dws.yaml
 ```
 <!-- BEGIN TFDOC -->
 ## Variables
diff --git a/tests/modules/gke_nodepool/examples/dws.yaml b/tests/modules/gke_nodepool/examples/dws.yaml
new file mode 100644
index 0000000000..59f0ca4870
--- /dev/null
+++ b/tests/modules/gke_nodepool/examples/dws.yaml
@@ -0,0 +1,39 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+values:
+  module.cluster-1-nodepool-dws.google_container_node_pool.nodepool:
+    cluster: cluster-1
+    location: europe-west4-a
+    name: nodepool-dws
+    node_config:
+    - boot_disk_kms_key: null
+      disk_size_gb: 50
+      disk_type: pd-ssd
+      ephemeral_storage_config:
+      - local_ssd_count: 1
+      ephemeral_storage_local_ssd_config: []
+      guest_accelerator:
+      - count: 1
+        gpu_driver_installation_config:
+        - gpu_driver_version: LATEST
+        gpu_partition_size: null
+        gpu_sharing_config: null
+        type: nvidia-l4
+      gvnic: []
+      machine_type: g2-standard-4
+    project: myproject
+
+counts:
+  google_container_node_pool: 1