Skip to content

Commit

Permalink
Merge pull request GoogleCloudPlatform#3334 from GoogleCloudPlatform/…
Browse files Browse the repository at this point in the history
…release-candidate

Release 1.43.0
  • Loading branch information
ighosh98 authored Dec 5, 2024
2 parents 1a1e22a + 7ca11fc commit 995bd89
Show file tree
Hide file tree
Showing 119 changed files with 13,874 additions and 143 deletions.
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ HPC deployments on the Google Cloud Platform.`,
logging.Fatal("cmd.Help function failed: %s", err)
}
},
Version: "v1.42.0",
Version: "v1.43.0",
Annotations: annotation,
}
)
Expand Down
4 changes: 4 additions & 0 deletions community/examples/fsi-montecarlo-on-batch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
---
blueprint_name: fsi-montecarlo-on-batch

validators:
- validator: test_apis_enabled
skip: true # skipping this validator, since "service-enablement" will take care of it.

vars:
project_id: ## Set GCP Project ID Here ##
deployment_name: fsimontecarlo
Expand Down
2 changes: 1 addition & 1 deletion community/examples/hpc-build-slurm-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ vars:
image_build_machine_type: n2d-standard-16
build_from_image_family: hpc-rocky-linux-8
build_from_image_project: cloud-hpc-image-public
build_from_git_ref: 6.8.5
build_from_git_ref: 6.8.6
built_image_family: my-custom-slurm
built_instance_image:
family: $(vars.built_image_family)
Expand Down
27 changes: 22 additions & 5 deletions community/examples/xpk-gke-a3-megagpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ deployment_groups:
source: modules/network/vpc
settings:
subnetwork_name: xpk-gke-a3-megagpu-subnet
mtu: 8244
secondary_ranges:
xpk-gke-a3-megagpu-subnet:
- range_name: pods
Expand All @@ -47,6 +48,7 @@ deployment_groups:
global_ip_address_range: 192.169.0.0/16
network_count: 8
subnetwork_cidr_suffix: 24
mtu: 8244

- id: gke_cluster
source: modules/scheduler/gke-cluster
Expand All @@ -70,25 +72,40 @@ deployment_groups:
name: $(vars.deployment_name)-gp-np-1
group_placement_max_distance: 2

- id: a3_megagpu_pool_0
- id: system_pool
source: modules/compute/gke-node-pool
use: [gke_cluster]
settings:
name: system-pool
machine_type: "e2-standard-16"
autoscaling_total_min_nodes: 2
autoscaling_total_max_nodes: 8
initial_node_count: 2
zones: [$(vars.zone)]
host_maintenance_interval: PERIODIC
outputs: [instructions]

- id: a3_megagpu_pool_np_0
source: modules/compute/gke-node-pool
use: [gke_cluster, gpunets, group_placement_0]
settings:
name: a3-megagpu-pool-0
name: a3-megagpu-pool-np-0 # xpk naming scheme
machine_type: a3-megagpu-8g
autoscaling_total_min_nodes: 2
autoscaling_total_max_nodes: 150
initial_node_count: 2
zones: [$(vars.zone)]
host_maintenance_interval: PERIODIC
outputs: [instructions]

- id: a3_megagpu_pool_1
- id: a3_megagpu_pool_np_1
source: modules/compute/gke-node-pool
use: [gke_cluster, gpunets, group_placement_1]
settings:
name: a3-megagpu-pool-1
name: a3-megagpu-pool-np-1 # xpk naming scheme
machine_type: a3-megagpu-8g
autoscaling_total_min_nodes: 2
autoscaling_total_max_nodes: 150
initial_node_count: 2
zones: [$(vars.zone)]
host_maintenance_interval: PERIODIC
Expand All @@ -115,4 +132,4 @@ deployment_groups:
settings:
apply_manifests:
- source: $(ghpc_stage("xpk-gke-a3-megagpu-files"))/config-map.yaml.tftpl
template_vars: {name: "xpk-gke-a3-megagpu-resources-configmap", num_nodes: "4"}
template_vars: {name: "$(vars.deployment_name)-resources-configmap", num_nodes: "4"}
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,11 @@ data:
node_labels = node['node_labels']
if (
'cloud.google.com/gke-placement-group' in node_labels
and 'topology.gke.io/cluster' in node_labels
'topology.gke.io/cluster' in node_labels
and 'topology.gke.io/rack' in node_labels
and 'topology.gke.io/host' in node_labels
):
return (
node_labels['cloud.google.com/gke-placement-group'],
node_labels['topology.gke.io/cluster'],
node_labels['topology.gke.io/rack'],
node_labels['topology.gke.io/host'],
Expand Down Expand Up @@ -144,13 +142,6 @@ data:
node_name = node.metadata.name
node_labels = node.metadata.labels
if 'cloud.google.com/gke-placement-group' not in node_labels:
print(
f'Skipping node {node_name} because it does not have topology'
' metadata'
)
continue
skip_node = False
if node.spec.taints is not None:
for t in node.spec.taints:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ locals {
"a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 },
"a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 },
"a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 },
"a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 },
"g2-standard-4" = { type = "nvidia-l4", count = 1 },
"g2-standard-8" = { type = "nvidia-l4", count = 1 },
"g2-standard-12" = { type = "nvidia-l4", count = 1 },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ terraform {
}

provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:htcondor-execute-point/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:htcondor-execute-point/v1.43.0"
}
}
2 changes: 1 addition & 1 deletion community/modules/compute/mig/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:mig/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:mig/v1.43.0"
}
}
2 changes: 1 addition & 1 deletion community/modules/compute/pbspro-execution/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ No resources.
| <a name="input_machine_type"></a> [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation | `string` | `"c2-standard-60"` | no |
| <a name="input_metadata"></a> [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no |
| <a name="input_name_prefix"></a> [name\_prefix](#input\_name\_prefix) | Name prefix for PBS execution hostnames | `string` | `null` | no |
| <a name="input_network_interfaces"></a> [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform<br/>network\_interface block of google\_compute\_instance. For descriptions of the<br/>subfields or more information see the documentation:<br/>https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface<br/><br/>**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and<br/>`subnetwork_self_link` will be ignored, even if they are provided through<br/>the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply<br/>to network interfaces defined in this variable.<br/><br/>Subfields:<br/>network (string, required if subnetwork is not supplied)<br/>subnetwork (string, required if network is not supplied)<br/>subnetwork\_project (string, optional)<br/>network\_ip (string, optional)<br/>nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET"])<br/>stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])<br/>queue\_count (number, optional)<br/>access\_config (object, optional)<br/>ipv6\_access\_config (object, optional)<br/>alias\_ip\_range (list(object), optional) | <pre>list(object({<br/> network = string,<br/> subnetwork = string,<br/> subnetwork_project = string,<br/> network_ip = string,<br/> nic_type = string,<br/> stack_type = string,<br/> queue_count = number,<br/> access_config = list(object({<br/> nat_ip = string,<br/> public_ptr_domain_name = string,<br/> network_tier = string<br/> })),<br/> ipv6_access_config = list(object({<br/> public_ptr_domain_name = string,<br/> network_tier = string<br/> })),<br/> alias_ip_range = list(object({<br/> ip_cidr_range = string,<br/> subnetwork_range_name = string<br/> }))<br/> }))</pre> | `[]` | no |
| <a name="input_network_interfaces"></a> [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform<br/>network\_interface block of google\_compute\_instance. For descriptions of the<br/>subfields or more information see the documentation:<br/>https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface<br/><br/>**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and<br/>`subnetwork_self_link` will be ignored, even if they are provided through<br/>the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply<br/>to network interfaces defined in this variable.<br/><br/>Subfields:<br/>network (string, required if subnetwork is not supplied)<br/>subnetwork (string, required if network is not supplied)<br/>subnetwork\_project (string, optional)<br/>network\_ip (string, optional)<br/>nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET", "RDMA", "IRDMA", "MRDMA"])<br/>stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])<br/>queue\_count (number, optional)<br/>access\_config (object, optional)<br/>ipv6\_access\_config (object, optional)<br/>alias\_ip\_range (list(object), optional) | <pre>list(object({<br/> network = string,<br/> subnetwork = string,<br/> subnetwork_project = string,<br/> network_ip = string,<br/> nic_type = string,<br/> stack_type = string,<br/> queue_count = number,<br/> access_config = list(object({<br/> nat_ip = string,<br/> public_ptr_domain_name = string,<br/> network_tier = string<br/> })),<br/> ipv6_access_config = list(object({<br/> public_ptr_domain_name = string,<br/> network_tier = string<br/> })),<br/> alias_ip_range = list(object({<br/> ip_cidr_range = string,<br/> subnetwork_range_name = string<br/> }))<br/> }))</pre> | `[]` | no |
| <a name="input_network_self_link"></a> [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no |
| <a name="input_network_storage"></a> [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. | <pre>list(object({<br/> server_ip = string,<br/> remote_mount = string,<br/> local_mount = string,<br/> fs_type = string,<br/> mount_options = string,<br/> client_install_runner = map(string)<br/> mount_runner = map(string)<br/> }))</pre> | `[]` | no |
| <a name="input_on_host_maintenance"></a> [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `null` | no |
Expand Down
2 changes: 1 addition & 1 deletion community/modules/compute/pbspro-execution/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ variable "network_interfaces" {
subnetwork (string, required if network is not supplied)
subnetwork_project (string, optional)
network_ip (string, optional)
nic_type (string, optional, choose from ["GVNIC", "VIRTIO_NET"])
nic_type (string, optional, choose from ["GVNIC", "VIRTIO_NET", "RDMA", "IRDMA", "MRDMA"])
stack_type (string, optional, choose from ["IPV4_ONLY", "IPV4_IPV6"])
queue_count (number, optional)
access_config (object, optional)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ locals {
"a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 },
"a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 },
"a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 },
"a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 },
"g2-standard-4" = { type = "nvidia-l4", count = 1 },
"g2-standard-8" = { type = "nvidia-l4", count = 1 },
"g2-standard-12" = { type = "nvidia-l4", count = 1 },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-node-group/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-node-group/v1.43.0"
}
required_version = ">= 1.1"
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-partition/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-partition/v1.43.0"
}
required_version = ">= 0.13.0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ modules. For support with the underlying modules, see the instructions in the
| Name | Source | Version |
|------|--------|---------|
| <a name="module_slurm_nodeset_template"></a> [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 6.8.5 |
| <a name="module_slurm_nodeset_template"></a> [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 6.8.6 |
## Resources
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ locals {
"a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 },
"a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 },
"a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 },
"a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 },
"g2-standard-4" = { type = "nvidia-l4", count = 1 },
"g2-standard-8" = { type = "nvidia-l4", count = 1 },
"g2-standard-12" = { type = "nvidia-l4", count = 1 },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ locals {
}

module "slurm_nodeset_template" {
source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=6.8.5"
source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=6.8.6"

project_id = var.project_id
region = var.region
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-dynamic/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-dynamic/v1.43.0"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ terraform {
required_version = ">= 1.3"

provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-tpu/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-tpu/v1.43.0"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ locals {
"a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 },
"a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 },
"a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 },
"a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 },
"g2-standard-4" = { type = "nvidia-l4", count = 1 },
"g2-standard-8" = { type = "nvidia-l4", count = 1 },
"g2-standard-12" = { type = "nvidia-l4", count = 1 },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset/v1.43.0"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ terraform {
required_version = ">= 1.3"

provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-partition/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-partition/v1.43.0"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.43.0"
}
provider_meta "google-beta" {
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.43.0"
}

required_version = ">= 0.13.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.43.0"
}
required_version = ">= 0.14.0"
}
2 changes: 1 addition & 1 deletion community/modules/file-system/nfs-server/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.43.0"
}

required_version = ">= 0.14.0"
Expand Down
4 changes: 2 additions & 2 deletions community/modules/files/fsi-montecarlo-on-batch/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.43.0"
}
provider_meta "google-beta" {
module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:fsi-montecarlo-on-batch/v1.43.0"
}
}
1 change: 1 addition & 0 deletions community/modules/network/private-service-access/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ No modules.

| Name | Description |
|------|-------------|
| <a name="output_cidr_range"></a> [cidr\_range](#output\_cidr\_range) | CIDR range of the created google\_compute\_global\_address |
| <a name="output_connect_mode"></a> [connect\_mode](#output\_connect\_mode) | Services that use Private Service Access typically specify connect\_mode<br/>"PRIVATE\_SERVICE\_ACCESS". This output value sets connect\_mode and additionally<br/>blocks terraform actions until the VPC connection has been created. |
| <a name="output_private_vpc_connection_peering"></a> [private\_vpc\_connection\_peering](#output\_private\_vpc\_connection\_peering) | The name of the VPC Network peering connection that was created by the service provider. |
| <a name="output_reserved_ip_range"></a> [reserved\_ip\_range](#output\_reserved\_ip\_range) | Named IP range to be used by services connected with Private Service Access. |
Expand Down
5 changes: 5 additions & 0 deletions community/modules/network/private-service-access/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,8 @@ output "reserved_ip_range" {
description = "Named IP range to be used by services connected with Private Service Access."
value = google_compute_global_address.private_ip_alloc.name
}

output "cidr_range" {
description = "CIDR range of the created google_compute_global_address"
value = "${google_compute_global_address.private_ip_alloc.address}/${google_compute_global_address.private_ip_alloc.prefix_length}"
}
4 changes: 2 additions & 2 deletions community/modules/network/private-service-access/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.43.0"
}

provider_meta "google-beta" {
module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:private-service-access/v1.43.0"
}

required_version = ">= 1.2"
Expand Down
2 changes: 1 addition & 1 deletion community/modules/project/service-enablement/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.43.0"
}

required_version = ">= 0.14.0"
Expand Down
4 changes: 2 additions & 2 deletions community/modules/pubsub/bigquery-sub/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.43.0"
}
provider_meta "google-beta" {
module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:bigquery-sub/v1.43.0"
}
required_version = ">= 1.0"
}
2 changes: 1 addition & 1 deletion community/modules/pubsub/topic/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:topic/v1.42.0"
module_name = "blueprints/terraform/hpc-toolkit:topic/v1.43.0"
}
}
Loading

0 comments on commit 995bd89

Please sign in to comment.