Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable TPUs to use Shared VPC #7294

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/3939.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
tpu: added `use_service_networking` to `google_tpu_node` which enables Shared VPC Support.
```
90 changes: 76 additions & 14 deletions google/resource_tpu_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,36 @@ func compareTpuNodeSchedulingConfig(k, old, new string, d *schema.ResourceData)
return false
}

func tpuNodeCustomizeDiff(diff *schema.ResourceDiff, meta interface{}) error {
old, new := diff.GetChange("network")
config := meta.(*Config)

networkLinkRegex := regexp.MustCompile("projects/(.+)/global/networks/(.+)")

var pid string

if networkLinkRegex.MatchString(new.(string)) {
parts := networkLinkRegex.FindStringSubmatch(new.(string))
pid = parts[1]
}

project, err := config.clientResourceManager.Projects.Get(pid).Do()
if err != nil {
return fmt.Errorf("Failed to retrieve project, pid: %s, err: %s", pid, err)
}

if networkLinkRegex.MatchString(old.(string)) {
parts := networkLinkRegex.FindStringSubmatch(old.(string))
i, err := strconv.ParseInt(parts[1], 10, 64)
if err == nil {
if project.ProjectNumber == i {
diff.SetNew("network", old)
return nil
}
}
}
return nil
}
func validateHttpHeaders() schema.SchemaValidateFunc {
return func(i interface{}, k string) (s []string, es []error) {
headers := i.(map[string]interface{})
Expand Down Expand Up @@ -75,27 +105,15 @@ func resourceTPUNode() *schema.Resource {
Delete: schema.DefaultTimeout(15 * time.Minute),
},

CustomizeDiff: tpuNodeCustomizeDiff,

Schema: map[string]*schema.Schema{
"accelerator_type": {
Type: schema.TypeString,
Required: true,
ForceNew: true,
Description: `The type of hardware accelerators associated with this node.`,
},
"cidr_block": {
Type: schema.TypeString,
Required: true,
ForceNew: true,
Description: `The CIDR block that the TPU node will use when selecting an IP
address. This CIDR block must be a /29 block; the Compute Engine
networks API forbids a smaller block, and using a larger block would
be wasteful (a node can only consume one IP address).

Errors will occur if the CIDR block has already been used for a
currently existing TPU node, the CIDR block conflicts with any
subnetworks in the user's provided network, or the provided network
is peered with another network that is using that CIDR block.`,
},
"name": {
Type: schema.TypeString,
Required: true,
Expand All @@ -113,6 +131,22 @@ is peered with another network that is using that CIDR block.`,
ForceNew: true,
Description: `The GCP location for the TPU.`,
},
"cidr_block": {
Type: schema.TypeString,
Computed: true,
Optional: true,
ForceNew: true,
Description: `The CIDR block that the TPU node will use when selecting an IP
address. This CIDR block must be a /29 block; the Compute Engine
networks API forbids a smaller block, and using a larger block would
be wasteful (a node can only consume one IP address).

Errors will occur if the CIDR block has already been used for a
currently existing TPU node, the CIDR block conflicts with any
subnetworks in the user's provided network, or the provided network
is peered with another network that is using that CIDR block.`,
ConflictsWith: []string{"use_service_networking"},
},
"description": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -156,6 +190,17 @@ used.`,
},
},
},
"use_service_networking": {
Type: schema.TypeBool,
Optional: true,
ForceNew: true,
Description: `Whether the VPC peering for the node is set up through Service Networking API.
The VPC Peering should be set up before provisioning the node. If this field is set,
cidr_block field should not be specified. If the network that you want to peer the
TPU Node to is a Shared VPC network, the node must be created with this this field enabled.`,
Default: false,
ConflictsWith: []string{"cidr_block"},
},
"network_endpoints": {
Type: schema.TypeList,
Computed: true,
Expand Down Expand Up @@ -235,6 +280,12 @@ func resourceTPUNodeCreate(d *schema.ResourceData, meta interface{}) error {
} else if v, ok := d.GetOkExists("cidr_block"); !isEmptyValue(reflect.ValueOf(cidrBlockProp)) && (ok || !reflect.DeepEqual(v, cidrBlockProp)) {
obj["cidrBlock"] = cidrBlockProp
}
useServiceNetworkingProp, err := expandTPUNodeUseServiceNetworking(d.Get("use_service_networking"), d, config)
if err != nil {
return err
} else if v, ok := d.GetOkExists("use_service_networking"); !isEmptyValue(reflect.ValueOf(useServiceNetworkingProp)) && (ok || !reflect.DeepEqual(v, useServiceNetworkingProp)) {
obj["useServiceNetworking"] = useServiceNetworkingProp
}
schedulingConfigProp, err := expandTPUNodeSchedulingConfig(d.Get("scheduling_config"), d, config)
if err != nil {
return err
Expand Down Expand Up @@ -358,6 +409,9 @@ func resourceTPUNodeRead(d *schema.ResourceData, meta interface{}) error {
if err := d.Set("service_account", flattenTPUNodeServiceAccount(res["serviceAccount"], d, config)); err != nil {
return fmt.Errorf("Error reading Node: %s", err)
}
if err := d.Set("use_service_networking", flattenTPUNodeUseServiceNetworking(res["useServiceNetworking"], d, config)); err != nil {
return fmt.Errorf("Error reading Node: %s", err)
}
if err := d.Set("scheduling_config", flattenTPUNodeSchedulingConfig(res["schedulingConfig"], d, config)); err != nil {
return fmt.Errorf("Error reading Node: %s", err)
}
Expand Down Expand Up @@ -519,6 +573,10 @@ func flattenTPUNodeServiceAccount(v interface{}, d *schema.ResourceData, config
return v
}

func flattenTPUNodeUseServiceNetworking(v interface{}, d *schema.ResourceData, config *Config) interface{} {
return v
}

func flattenTPUNodeSchedulingConfig(v interface{}, d *schema.ResourceData, config *Config) interface{} {
if v == nil {
return nil
Expand Down Expand Up @@ -604,6 +662,10 @@ func expandTPUNodeCidrBlock(v interface{}, d TerraformResourceData, config *Conf
return v, nil
}

func expandTPUNodeUseServiceNetworking(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) {
return v, nil
}

func expandTPUNodeSchedulingConfig(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) {
l := v.([]interface{})
if len(l) == 0 || l[0] == nil {
Expand Down
22 changes: 20 additions & 2 deletions google/resource_tpu_node_generated_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ resource "google_tpu_node" "tpu" {

accelerator_type = "v3-8"

cidr_block = "10.3.0.0/29"
tensorflow_version = data.google_tpu_tensorflow_versions.available.versions[0]

description = "Terraform Google Provider test TPU"
network = "default"
use_service_networking = true
network = google_service_networking_connection.private_service_connection.network

labels = {
foo = "bar"
Expand All @@ -116,6 +116,24 @@ resource "google_tpu_node" "tpu" {
preemptible = true
}
}

data "google_compute_network" "network" {
name = "default"
}

resource "google_compute_global_address" "service_range" {
name = "tf-test%{random_suffix}"
purpose = "VPC_PEERING"
address_type = "INTERNAL"
prefix_length = 16
network = data.google_compute_network.network.id
}

resource "google_service_networking_connection" "private_service_connection" {
network = data.google_compute_network.network.id
service = "servicenetworking.googleapis.com"
reserved_peering_ranges = [google_compute_global_address.service_range.name]
}
`, context)
}

Expand Down
53 changes: 39 additions & 14 deletions website/docs/r/tpu_node.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ A Cloud TPU instance.

To get more information about Node, see:

* [API documentation](https://cloud.google.com/tpu/docs/reference/rest/)
* [API documentation](https://cloud.google.com/tpu/docs/reference/rest/v1/projects.locations.nodes)
* How-to Guides
* [Official Documentation](https://cloud.google.com/tpu/docs/)

Expand Down Expand Up @@ -72,11 +72,11 @@ resource "google_tpu_node" "tpu" {

accelerator_type = "v3-8"

cidr_block = "10.3.0.0/29"
tensorflow_version = data.google_tpu_tensorflow_versions.available.versions[0]

description = "Terraform Google Provider test TPU"
network = "default"
use_service_networking = true
network = google_service_networking_connection.private_service_connection.network

labels = {
foo = "bar"
Expand All @@ -86,6 +86,24 @@ resource "google_tpu_node" "tpu" {
preemptible = true
}
}

data "google_compute_network" "network" {
name = "default"
}

resource "google_compute_global_address" "service_range" {
name = "tf-test%{random_suffix}"
purpose = "VPC_PEERING"
address_type = "INTERNAL"
prefix_length = 16
network = data.google_compute_network.network.id
}

resource "google_service_networking_connection" "private_service_connection" {
network = data.google_compute_network.network.id
service = "servicenetworking.googleapis.com"
reserved_peering_ranges = [google_compute_global_address.service_range.name]
}
```

## Argument Reference
Expand All @@ -105,17 +123,6 @@ The following arguments are supported:
(Required)
The version of Tensorflow running in the Node.

* `cidr_block` -
(Required)
The CIDR block that the TPU node will use when selecting an IP
address. This CIDR block must be a /29 block; the Compute Engine
networks API forbids a smaller block, and using a larger block would
be wasteful (a node can only consume one IP address).
Errors will occur if the CIDR block has already been used for a
currently existing TPU node, the CIDR block conflicts with any
subnetworks in the user's provided network, or the provided network
is peered with another network that is using that CIDR block.

* `zone` -
(Required)
The GCP location for the TPU.
Expand All @@ -135,6 +142,24 @@ The following arguments are supported:
this API has been activated. If none is provided, "default" will be
used.

* `cidr_block` -
(Optional)
The CIDR block that the TPU node will use when selecting an IP
address. This CIDR block must be a /29 block; the Compute Engine
networks API forbids a smaller block, and using a larger block would
be wasteful (a node can only consume one IP address).
Errors will occur if the CIDR block has already been used for a
currently existing TPU node, the CIDR block conflicts with any
subnetworks in the user's provided network, or the provided network
is peered with another network that is using that CIDR block.

* `use_service_networking` -
(Optional)
Whether the VPC peering for the node is set up through Service Networking API.
The VPC Peering should be set up before provisioning the node. If this field is set,
cidr_block field should not be specified. If the network that you want to peer the
TPU Node to is a Shared VPC network, the node must be created with this this field enabled.

* `scheduling_config` -
(Optional)
Sets the scheduling options for this TPU instance.
Expand Down