diff --git a/mmv1/third_party/terraform/tests/resource_container_node_pool_test.go.erb b/mmv1/third_party/terraform/tests/resource_container_node_pool_test.go.erb index a640a0b9a9d9..a25cdb7583fd 100644 --- a/mmv1/third_party/terraform/tests/resource_container_node_pool_test.go.erb +++ b/mmv1/third_party/terraform/tests/resource_container_node_pool_test.go.erb @@ -2274,6 +2274,10 @@ resource "google_container_node_pool" "np_with_gpu" { type = "nvidia-tesla-a100" gpu_partition_size = "1g.5gb" count = 1 + gpu_sharing_config { + gpu_sharing_strategy = "TIME_SHARING" + max_shared_clients_per_gpu = 2 + } } } } diff --git a/mmv1/third_party/terraform/utils/node_config.go.erb b/mmv1/third_party/terraform/utils/node_config.go.erb index 83b8cff50e29..953a5ae652d1 100644 --- a/mmv1/third_party/terraform/utils/node_config.go.erb +++ b/mmv1/third_party/terraform/utils/node_config.go.erb @@ -102,6 +102,30 @@ func schemaNodeConfig() *schema.Schema { ForceNew: true, Description: `Size of partitions to create on the GPU. Valid values are described in the NVIDIA mig user guide (https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning)`, }, + "gpu_sharing_config": &schema.Schema{ + Type: schema.TypeList, + MaxItems: 1, + Optional: true, + ForceNew: true, + ConfigMode: schema.SchemaConfigModeAttr, + Description: `Configuration for GPU sharing.`, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "gpu_sharing_strategy": &schema.Schema{ + Type: schema.TypeString, + Required: true, + ForceNew: true, + Description: `The type of GPU sharing strategy to enable on the GPU node. Possible values are described in the API package (https://pkg.go.dev/google.golang.org/api/container/v1#GPUSharingConfig)`, + }, + "max_shared_clients_per_gpu": &schema.Schema{ + Type: schema.TypeInt, + Required: true, + ForceNew: true, + Description: `The maximum number of containers that can share a GPU.`, + }, + }, + }, + }, }, }, }, @@ -491,11 +515,21 @@ func expandNodeConfig(v interface{}) *container.NodeConfig { if data["count"].(int) == 0 { continue } - guestAccelerators = append(guestAccelerators, &container.AcceleratorConfig{ + guestAcceleratorConfig := &container.AcceleratorConfig{ AcceleratorCount: int64(data["count"].(int)), AcceleratorType: data["type"].(string), GpuPartitionSize: data["gpu_partition_size"].(string), - }) + } + + if v, ok := data["gpu_sharing_config"]; ok && len(v.([]interface{})) > 0 { + gpuSharingConfig := data["gpu_sharing_config"].([]interface{})[0].(map[string]interface{}) + guestAcceleratorConfig.GpuSharingConfig = &container.GPUSharingConfig{ + GpuSharingStrategy: gpuSharingConfig["gpu_sharing_strategy"].(string), + MaxSharedClientsPerGpu: int64(gpuSharingConfig["max_shared_clients_per_gpu"].(int)), + } + } + + guestAccelerators = append(guestAccelerators, guestAcceleratorConfig) } nc.Accelerators = guestAccelerators } @@ -795,11 +829,20 @@ func flattenNodeConfig(c *container.NodeConfig) []map[string]interface{} { func flattenContainerGuestAccelerators(c []*container.AcceleratorConfig) []map[string]interface{} { result := []map[string]interface{}{} for _, accel := range c { - result = append(result, map[string]interface{}{ + accelerator := map[string]interface{}{ "count": accel.AcceleratorCount, "type": accel.AcceleratorType, "gpu_partition_size": accel.GpuPartitionSize, - }) + } + if accel.GpuSharingConfig != nil { + accelerator["gpu_sharing_config"] = []map[string]interface{}{ + { + "gpu_sharing_strategy": accel.GpuSharingConfig.GpuSharingStrategy, + "max_shared_clients_per_gpu": accel.GpuSharingConfig.MaxSharedClientsPerGpu, + }, + } + } + result = append(result, accelerator) } return result } diff --git a/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown b/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown index 956a15f1cca6..4dc0e3a516eb 100755 --- a/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown +++ b/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown @@ -843,6 +843,16 @@ linux_node_config { * `gpu_partition_size` (Optional) - Size of partitions to create on the GPU. Valid values are described in the NVIDIA mig [user guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning). +* `gpu_sharing_config` (Optional) - Configuration for GPU sharing. Structure is [documented below](#nested_gpu_sharing_config). + +The `gpu_sharing_config` block supports: + +* `gpu_sharing_strategy` (Required) - The type of GPU sharing strategy to enable on the GPU node. + Accepted values are: + * `"TIME_SHARING"`: Allow multiple containers to have [time-shared](https://cloud.google.com/kubernetes-engine/docs/concepts/timesharing-gpus) access to a single GPU device. + +* `max_shared_clients_per_gpu` (Required) - The maximum number of containers that can share a GPU. + The `workload_identity_config` block supports: * `workload_pool` (Optional) - The workload pool to attach all Kubernetes service accounts to.