Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: promote tpu to ga #1856

Merged
merged 1 commit into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ Then perform the following commands on the root folder:
| enable\_network\_egress\_export | Whether to enable network egress metering for this cluster. If enabled, a daemonset will be created in the cluster to meter network egress traffic. | `bool` | `false` | no |
| enable\_resource\_consumption\_export | Whether to enable resource consumption metering on this cluster. When enabled, a table will be created in the resource export BigQuery dataset to store resource consumption data. The resulting table can be joined with the resource usage table or with BigQuery billing export. | `bool` | `true` | no |
| enable\_shielded\_nodes | Enable Shielded Nodes features on all nodes in this cluster | `bool` | `true` | no |
| enable\_tpu | Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive! | `bool` | `false` | no |
| enable\_vertical\_pod\_autoscaling | Vertical Pod Autoscaling automatically adjusts the resources of pods controlled by it | `bool` | `false` | no |
| filestore\_csi\_driver | The status of the Filestore CSI driver addon, which allows the usage of filestore instance as volumes | `bool` | `false` | no |
| firewall\_inbound\_ports | List of TCP ports for admission/webhook controllers. Either flag `add_master_webhook_firewall_rules` or `add_cluster_firewall_rules` (also adds egress rules) must be set to `true` for inbound-ports firewall rules to be applied. | `list(string)` | <pre>[<br> "8443",<br> "9443",<br> "15017"<br>]</pre> | no |
Expand Down Expand Up @@ -257,6 +258,7 @@ Then perform the following commands on the root folder:
| region | Cluster region |
| release\_channel | The release channel of this cluster |
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
| tpu\_ipv4\_cidr\_block | The IP range in CIDR notation used for the TPUs |
| type | Cluster type (regional / zonal) |
| vertical\_pod\_autoscaling\_enabled | Whether vertical pod autoscaling enabled |
| zones | List of zones in which the cluster resides |
Expand Down
3 changes: 1 addition & 2 deletions autogen/main/cluster.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,9 @@ resource "google_container_cluster" "primary" {
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_tpu = var.enable_tpu
{% if beta_cluster %}
enable_intranode_visibility = var.enable_intranode_visibility
enable_tpu = var.enable_tpu

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
3 changes: 0 additions & 3 deletions autogen/main/firewall.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ resource "google_compute_firewall" "intra_egress" {
}


{% if beta_cluster %}
/******************************************
Allow egress to the TPU IPv4 CIDR block

Expand Down Expand Up @@ -95,8 +94,6 @@ resource "google_compute_firewall" "tpu_egress" {
{% endif %}
}


{% endif %}
/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions autogen/main/outputs.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

{% if autopilot_cluster != true %}
output "mesh_certificates_config" {
description = "Mesh certificates configuration"
Expand Down Expand Up @@ -228,9 +233,4 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
{% endif %}
3 changes: 1 addition & 2 deletions autogen/main/variables.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -600,13 +600,12 @@ variable "deletion_protection" {
default = true
}

{% if beta_cluster %}
variable "enable_tpu" {
type = bool
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}
{% endif %}

{% if autopilot_cluster != true %}
variable "network_policy" {
type = bool
Expand Down
2 changes: 1 addition & 1 deletion cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ resource "google_container_cluster" "primary" {
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_tpu = var.enable_tpu
dynamic "master_authorized_networks_config" {
for_each = local.master_authorized_networks_config
content {
Expand Down
35 changes: 35 additions & 0 deletions firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,41 @@ resource "google_compute_firewall" "intra_egress" {
}


/******************************************
Allow egress to the TPU IPv4 CIDR block

This rule is defined separately from the
intra_egress rule above since it requires
an output from the google_container_cluster
resource.

https://github.com/terraform-google-modules/terraform-google-kubernetes-engine/issues/1124
*****************************************/
resource "google_compute_firewall" "tpu_egress" {
count = var.add_cluster_firewall_rules && var.enable_tpu ? 1 : 0
name = "gke-${substr(var.name, 0, min(36, length(var.name)))}-tpu-egress"
description = "Managed by terraform gke module: Allow pods to communicate with TPUs"
project = local.network_project_id
network = var.network
priority = var.firewall_priority
direction = "EGRESS"

target_tags = [local.cluster_network_tag]
destination_ranges = [google_container_cluster.primary.tpu_ipv4_cidr_block]

# Allow all possible protocols
allow { protocol = "tcp" }
allow { protocol = "udp" }
allow { protocol = "icmp" }
allow { protocol = "sctp" }
allow { protocol = "esp" }
allow { protocol = "ah" }

depends_on = [
google_container_cluster.primary,
]
}

/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
1 change: 0 additions & 1 deletion modules/beta-autopilot-private-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {

}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-autopilot-private-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}



output "master_ipv4_cidr_block" {
Expand Down Expand Up @@ -183,8 +188,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-autopilot-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "database_encryption" {
description = "Application-layer Secrets Encryption settings. The object format is {state = string, key_name = string}. Valid values of state are: \"ENCRYPTED\"; \"DECRYPTED\". key_name is the name of a CloudKMS key."
type = list(object({ state = string, key_name = string }))
Expand Down
1 change: 0 additions & 1 deletion modules/beta-autopilot-public-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
]
}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-autopilot-public-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}



output "cloudrun_enabled" {
Expand Down Expand Up @@ -173,8 +178,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-autopilot-public-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "database_encryption" {
description = "Application-layer Secrets Encryption settings. The object format is {state = string, key_name = string}. Valid values of state are: \"ENCRYPTED\"; \"DECRYPTED\". key_name is the name of a CloudKMS key."
type = list(object({ state = string, key_name = string }))
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-private-cluster-update-variant/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-private-cluster-update-variant/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {

}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-private-cluster-update-variant/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -209,8 +214,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-private-cluster-update-variant/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "network_policy" {
type = bool
description = "Enable network policy addon"
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-private-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-private-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ resource "google_compute_firewall" "tpu_egress" {

}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-private-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -209,8 +214,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "network_policy" {
type = bool
description = "Enable network policy addon"
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-public-cluster-update-variant/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-public-cluster-update-variant/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
]
}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-public-cluster-update-variant/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -199,8 +204,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
1 change: 1 addition & 0 deletions modules/beta-public-cluster-update-variant/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ variable "enable_tpu" {
description = "Enable Cloud TPU resources in the cluster. WARNING: changing this after cluster creation is destructive!"
default = false
}

variable "network_policy" {
type = bool
description = "Enable network policy addon"
Expand Down
5 changes: 2 additions & 3 deletions modules/beta-public-cluster/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,9 @@ resource "google_container_cluster" "primary" {
}
}

enable_kubernetes_alpha = var.enable_kubernetes_alpha

enable_intranode_visibility = var.enable_intranode_visibility
enable_kubernetes_alpha = var.enable_kubernetes_alpha
enable_tpu = var.enable_tpu
enable_intranode_visibility = var.enable_intranode_visibility

dynamic "pod_security_policy_config" {
for_each = var.enable_pod_security_policy ? [var.enable_pod_security_policy] : []
Expand Down
1 change: 0 additions & 1 deletion modules/beta-public-cluster/firewall.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ resource "google_compute_firewall" "tpu_egress" {
]
}


/******************************************
Allow GKE master to hit non 443 ports for
Webhooks/Admission Controllers
Expand Down
10 changes: 5 additions & 5 deletions modules/beta-public-cluster/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ output "identity_namespace" {
]
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}

output "mesh_certificates_config" {
description = "Mesh certificates configuration"
value = local.cluster_mesh_certificates_config
Expand Down Expand Up @@ -199,8 +204,3 @@ output "identity_service_enabled" {
description = "Whether Identity Service is enabled"
value = local.cluster_pod_security_policy_enabled
}

output "tpu_ipv4_cidr_block" {
description = "The IP range in CIDR notation used for the TPUs"
value = var.enable_tpu ? google_container_cluster.primary.tpu_ipv4_cidr_block : null
}
Loading