diff --git a/community/modules/compute/htcondor-execute-point/gpu_definition.tf b/community/modules/compute/htcondor-execute-point/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/compute/htcondor-execute-point/gpu_definition.tf +++ b/community/modules/compute/htcondor-execute-point/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/modules/compute/gke-node-pool/gpu_definition.tf b/modules/compute/gke-node-pool/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/modules/compute/gke-node-pool/gpu_definition.tf +++ b/modules/compute/gke-node-pool/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index f391532976..f7ef813496 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -23,7 +23,7 @@ locals { sa_email = var.service_account_email != null ? var.service_account_email : data.google_compute_default_service_account.default_sa.email preattached_gpu_machine_family = contains(["a2", "a3", "g2"], local.machine_family) - has_gpu = (local.guest_accelerator != null && length(local.guest_accelerator) > 0) || local.preattached_gpu_machine_family + has_gpu = (local.guest_accelerator != null && (length([for ga in local.guest_accelerator : ga if ga.count > 0]) > 0)) || local.preattached_gpu_machine_family gpu_taint = local.has_gpu ? [{ key = "nvidia.com/gpu" value = "present" @@ -89,13 +89,13 @@ resource "google_container_node_pool" "node_pool" { image_type = var.image_type dynamic "guest_accelerator" { - for_each = local.guest_accelerator + for_each = { for idx, ga in local.guest_accelerator : idx => ga if ga.count > 0 } content { type = coalesce(guest_accelerator.value.type, try(local.generated_guest_accelerator[0].type, "")) count = coalesce(try(guest_accelerator.value.count, 0) > 0 ? guest_accelerator.value.count : try(local.generated_guest_accelerator[0].count, "0")) gpu_driver_installation_config = coalescelist(try(guest_accelerator.value.gpu_driver_installation_config, []), [{ gpu_driver_version = "DEFAULT" }]) gpu_partition_size = try(guest_accelerator.value.gpu_partition_size, "") - gpu_sharing_config = try(guest_accelerator.value.gpu_sharing_config, []) + gpu_sharing_config = try(guest_accelerator.value.gpu_sharing_config, null) } } diff --git a/modules/compute/gke-node-pool/reservation_definitions.tf b/modules/compute/gke-node-pool/reservation_definitions.tf index d40cc5b01f..a75246b185 100644 --- a/modules/compute/gke-node-pool/reservation_definitions.tf +++ b/modules/compute/gke-node-pool/reservation_definitions.tf @@ -55,7 +55,7 @@ locals { }] nodepool_vm_properties = { "machine_type" : var.machine_type - "guest_accelerators" : { for acc in try(local.guest_accelerator, []) : coalesce(acc.type, try(local.generated_guest_accelerator[0].type, "")) => coalesce(acc.count, try(local.generated_guest_accelerator[0].count, 0)) }, + "guest_accelerators" : { for acc in try(local.guest_accelerator, []) : (acc.count > 0 ? coalesce(acc.type, try(local.generated_guest_accelerator[0].type, "")) : "") => acc.count if acc.count > 0 }, "local_ssds" : { "NVME" : coalesce(local.local_ssd_config.local_ssd_count_nvme_block, 0), "SCSI" : coalesce(local.local_ssd_config.local_ssd_count_ephemeral_storage, 0) diff --git a/modules/compute/vm-instance/gpu_definition.tf b/modules/compute/vm-instance/gpu_definition.tf index 6c5d96d286..c6c3944332 100644 --- a/modules/compute/vm-instance/gpu_definition.tf +++ b/modules/compute/vm-instance/gpu_definition.tf @@ -47,11 +47,11 @@ locals { "g2-standard-48" = { type = "nvidia-l4", count = 4 }, "g2-standard-96" = { type = "nvidia-l4", count = 8 }, } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) + generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], [{ count = 0, type = "" }]) # Select in priority order: # (1) var.guest_accelerator if not empty # (2) local.generated_guest_accelerator if not empty # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) + guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), [{ count = 0, type = "" }]) } diff --git a/modules/compute/vm-instance/main.tf b/modules/compute/vm-instance/main.tf index 683fa77682..01207d701f 100644 --- a/modules/compute/vm-instance/main.tf +++ b/modules/compute/vm-instance/main.tf @@ -39,7 +39,7 @@ locals { # compact_placement : true when placement policy is provided and collocation set; false if unset compact_placement = try(var.placement_policy.collocation, null) != null - gpu_attached = contains(["a2", "g2"], local.machine_family) || length(local.guest_accelerator) > 0 + gpu_attached = contains(["a2", "g2"], local.machine_family) || (length([for ga in local.guest_accelerator : ga if ga.count > 0]) > 0) # both of these must be false if either compact placement or preemptible/spot instances are used # automatic restart is tolerant of GPUs while on host maintenance is not