From edf67fc5d040acfcd852a24a62d04acd2e4038f2 Mon Sep 17 00:00:00 2001 From: lcaggio Date: Tue, 18 Apr 2023 17:32:15 +0200 Subject: [PATCH 1/9] First commit --- .../data-solutions/vertex-mlops/README.md | 51 +++---- .../data-solutions/vertex-mlops/ci-cd.tf | 10 +- .../data-solutions/vertex-mlops/main.tf | 64 +++++---- .../data-solutions/vertex-mlops/notebooks.tf | 60 -------- .../data-solutions/vertex-mlops/outputs.tf | 16 +-- .../data-solutions/vertex-mlops/variables.tf | 93 ++++++------- .../data-solutions/vertex-mlops/vertex.tf | 128 ++++++++++++++++++ modules/project/service-agents.yaml | 1 + 8 files changed, 237 insertions(+), 186 deletions(-) delete mode 100644 blueprints/data-solutions/vertex-mlops/notebooks.tf create mode 100644 blueprints/data-solutions/vertex-mlops/vertex.tf diff --git a/blueprints/data-solutions/vertex-mlops/README.md b/blueprints/data-solutions/vertex-mlops/README.md index a204fee837..adc76bd376 100644 --- a/blueprints/data-solutions/vertex-mlops/README.md +++ b/blueprints/data-solutions/vertex-mlops/README.md @@ -52,63 +52,54 @@ This blueprint can be used as a building block for setting up an end2end ML Ops | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [project_id](variables.tf#L101) | Project id, references existing project if `project_create` is null. | string | ✓ | | +| [notebooks](variables.tf#L73) | Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed. | map(object({…})) | ✓ | | +| [project_config](variables.tf#L100) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | ✓ | | | [bucket_name](variables.tf#L18) | GCS bucket name to store the Vertex AI artifacts. | string | | null | | [dataset_name](variables.tf#L24) | BigQuery Dataset to store the training data. | string | | null | -| [groups](variables.tf#L30) | Name of the groups (name@domain.org) to apply opinionated IAM permissions. | object({…}) | | {…} | +| [groups](variables.tf#L30) | Name of the groups (name@domain.org) to apply opinionated IAM permissions. | object({…}) | | {…} | | [identity_pool_claims](variables.tf#L45) | Claims to be used by Workload Identity Federation (i.e.: attribute.repository/ORGANIZATION/REPO). If a not null value is provided, then google_iam_workload_identity_pool resource will be created. | string | | null | | [labels](variables.tf#L51) | Labels to be assigned at project level. | map(string) | | {} | | [location](variables.tf#L57) | Location used for multi-regional resources. | string | | "eu" | | [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | -| [notebooks](variables.tf#L73) | Vertex AI workbenchs to be deployed. | map(object({…})) | | {} | -| [prefix](variables.tf#L86) | Prefix used for the project id. | string | | null | -| [project_create](variables.tf#L92) | Provide values if project creation is needed, uses existing project if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | | null | -| [project_services](variables.tf#L106) | List of core services enabled on all projects. | list(string) | | […] | -| [region](variables.tf#L126) | Region used for regional resources. | string | | "europe-west4" | -| [repo_name](variables.tf#L132) | Cloud Source Repository name. null to avoid to create it. | string | | null | -| [sa_mlops_name](variables.tf#L138) | Name for the MLOPs Service Account. | string | | "sa-mlops" | +| [prefix](variables.tf#L94) | Prefix used for the project id. | string | | null | +| [region](variables.tf#L113) | Region used for regional resources. | string | | "europe-west4" | +| [repo_name](variables.tf#L119) | Cloud Source Repository name. null to avoid to create it. | string | | null | +| [service_encryption_keys](variables.tf#L125) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | {…} | ## Outputs | name | description | sensitive | |---|---|:---:| -| [github](outputs.tf#L33) | Github Configuration. | | -| [notebook](outputs.tf#L39) | Vertex AI managed notebook details. | | -| [project](outputs.tf#L44) | The project resource as return by the `project` module. | | -| [project_id](outputs.tf#L49) | Project ID. | | +| [github](outputs.tf#L30) | Github Configuration. | | +| [notebook](outputs.tf#L35) | Vertex AI managed notebook details. | | +| [project](outputs.tf#L43) | The project resource as return by the `project` module. | | +| [project_id](outputs.tf#L48) | Project ID. | | -## TODO - -- Add support for User Managed Notebooks, SA permission option and non default SA for Single User mode. -- Improve default naming for local VPC and Cloud NAT - ## Test ```hcl module "test" { source = "./fabric/blueprints/data-solutions/vertex-mlops/" labels = { - "env" : "dev", - "team" : "ml" + "env" = "dev", + "team" = "ml" } - bucket_name = "test-dev" - dataset_name = "test" + bucket_name = "gcs-test" + dataset_name = "bq-test" identity_pool_claims = "attribute.repository/ORGANIZATION/REPO" notebooks = { - "myworkbench" : { - "owner" : "user@example.com", - "region" : "europe-west4", - "subnet" : "default", + "myworkbench" = { + type = "USER_MANAGED" } } - prefix = "pref" - project_id = "test-dev" - project_create = { + prefix = "pref-dev" + project_config = { billing_account_id = "000000-123456-123456" parent = "folders/111111111111" + project_id = "test-dev" } } -# tftest modules=12 resources=57 +# tftest modules=13 resources=65 ``` diff --git a/blueprints/data-solutions/vertex-mlops/ci-cd.tf b/blueprints/data-solutions/vertex-mlops/ci-cd.tf index d73eacc83b..dcff587e52 100644 --- a/blueprints/data-solutions/vertex-mlops/ci-cd.tf +++ b/blueprints/data-solutions/vertex-mlops/ci-cd.tf @@ -44,14 +44,11 @@ module "artifact_registry" { project_id = module.project.project_id location = var.region format = "DOCKER" - # iam = { - # "roles/artifactregistry.admin" = ["group:cicd@example.com"] - # } } module "service-account-github" { source = "../../../modules/iam-service-account" - name = "sa-github" + name = "${var.prefix}-sa-github" project_id = module.project.project_id iam = var.identity_pool_claims == null ? {} : { "roles/iam.workloadIdentityUser" = ["principalSet://iam.googleapis.com/${google_iam_workload_identity_pool.github_pool[0].name}/${var.identity_pool_claims}"] } } @@ -63,6 +60,9 @@ module "secret-manager" { secrets = { github-key = [var.region] } + # encryption_key = { + # "${var.region}" = try(var.service_encryption_keys["secretmanager"], null) + # } iam = { github-key = { "roles/secretmanager.secretAccessor" = [ @@ -71,4 +71,4 @@ module "secret-manager" { ] } } -} \ No newline at end of file +} diff --git a/blueprints/data-solutions/vertex-mlops/main.tf b/blueprints/data-solutions/vertex-mlops/main.tf index 27129298af..ccb8c9d098 100644 --- a/blueprints/data-solutions/vertex-mlops/main.tf +++ b/blueprints/data-solutions/vertex-mlops/main.tf @@ -117,7 +117,7 @@ module "gcs-bucket" { module "gcs-bucket-cloudbuild" { source = "../../../modules/gcs" project_id = module.project.project_id - name = "${var.project_id}_cloudbuild" + name = "${var.prefix}_cloudbuild" prefix = var.prefix location = var.region storage_class = "REGIONAL" @@ -190,19 +190,19 @@ module "cloudnat" { module "project" { source = "../../../modules/project" - name = var.project_id - parent = try(var.project_create.parent, null) - billing_account = try(var.project_create.billing_account_id, null) - project_create = var.project_create != null + name = var.project_config.project_id + parent = try(var.project_config.parent, null) + billing_account = try(var.project_config.billing_account_id, null) + project_create = var.project_config.billing_account_id != null prefix = var.prefix group_iam = local.group_iam iam = { - "roles/aiplatform.user" = [module.service-account-mlops.iam_email] + "roles/aiplatform.user" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] "roles/artifactregistry.reader" = [module.service-account-mlops.iam_email] "roles/artifactregistry.writer" = [module.service-account-github.iam_email] - "roles/bigquery.dataEditor" = [module.service-account-mlops.iam_email] - "roles/bigquery.jobUser" = [module.service-account-mlops.iam_email] - "roles/bigquery.user" = [module.service-account-mlops.iam_email] + "roles/bigquery.dataEditor" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] + "roles/bigquery.jobUser" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] + "roles/bigquery.user" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] "roles/cloudbuild.builds.editor" = [ module.service-account-mlops.iam_email, module.service-account-github.iam_email @@ -213,6 +213,8 @@ module "project" { "roles/dataflow.worker" = [module.service-account-mlops.iam_email] "roles/iam.serviceAccountUser" = [ module.service-account-mlops.iam_email, + module.service-account-notebook.iam_email, + module.service-account-github.iam_email, "serviceAccount:${module.project.service_accounts.robots.cloudbuild}" ] "roles/monitoring.metricWriter" = [module.service-account-mlops.iam_email] @@ -223,28 +225,40 @@ module "project" { ] "roles/storage.admin" = [ module.service-account-mlops.iam_email, - module.service-account-github.iam_email + module.service-account-github.iam_email, + module.service-account-notebook.iam_email ] } labels = var.labels - org_policies = { - # Example of applying a project wide policy - # "compute.requireOsLogin" = { - # rules = [{ enforce = false }] - # } - } - service_encryption_key_ids = { + aiplatform = [try(local.service_encryption_keys.aiplatform, null)] bq = [try(local.service_encryption_keys.bq, null)] - compute = [try(local.service_encryption_keys.compute, null)] cloudbuild = [try(local.service_encryption_keys.storage, null)] - notebooks = [try(local.service_encryption_keys.compute, null)] + notebooks = [try(local.service_encryption_keys.notebooks, null)] storage = [try(local.service_encryption_keys.storage, null)] } - services = var.project_services - + services = [ + "aiplatform.googleapis.com", + "artifactregistry.googleapis.com", + "bigquery.googleapis.com", + "bigquerystorage.googleapis.com", + "cloudbuild.googleapis.com", + "compute.googleapis.com", + "datacatalog.googleapis.com", + "dataflow.googleapis.com", + "iam.googleapis.com", + "ml.googleapis.com", + "monitoring.googleapis.com", + "notebooks.googleapis.com", + "secretmanager.googleapis.com", + "servicenetworking.googleapis.com", + "serviceusage.googleapis.com", + "stackdriver.googleapis.com", + "storage.googleapis.com", + "storage-component.googleapis.com" + ] shared_vpc_service_config = local.shared_vpc_project == null ? null : { attach = true host_project = local.shared_vpc_project @@ -254,11 +268,8 @@ module "project" { module "service-account-mlops" { source = "../../../modules/iam-service-account" - name = var.sa_mlops_name + name = "${var.prefix}-sa-mlops" project_id = module.project.project_id - iam = { - "roles/iam.serviceAccountUser" = [module.service-account-github.iam_email] - } } resource "google_project_iam_member" "shared_vpc" { @@ -268,11 +279,8 @@ resource "google_project_iam_member" "shared_vpc" { member = "serviceAccount:${module.project.service_accounts.robots.notebooks}" } - resource "google_sourcerepo_repository" "code-repo" { count = var.repo_name == null ? 0 : 1 name = var.repo_name project = module.project.project_id } - - diff --git a/blueprints/data-solutions/vertex-mlops/notebooks.tf b/blueprints/data-solutions/vertex-mlops/notebooks.tf deleted file mode 100644 index 09d3e5a8b6..0000000000 --- a/blueprints/data-solutions/vertex-mlops/notebooks.tf +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Copyright 2022 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -resource "google_notebooks_runtime" "runtime" { - for_each = var.notebooks - name = each.key - - project = module.project.project_id - location = var.notebooks[each.key].region - access_config { - access_type = "SINGLE_USER" - runtime_owner = var.notebooks[each.key].owner - } - software_config { - enable_health_monitoring = true - idle_shutdown = var.notebooks[each.key].idle_shutdown - idle_shutdown_timeout = 1800 - } - virtual_machine { - virtual_machine_config { - machine_type = "n1-standard-4" - network = local.vpc - subnet = local.subnet - internal_ip_only = var.notebooks[each.key].internal_ip_only - dynamic "encryption_config" { - for_each = try(local.service_encryption_keys.compute, null) == null ? [] : [1] - content { - kms_key = local.service_encryption_keys.compute - } - } - metadata = { - notebook-disable-nbconvert = "false" - notebook-disable-downloads = "false" - notebook-disable-terminal = "false" - #notebook-disable-root = "true" - #notebook-upgrade-schedule = "48 4 * * MON" - } - data_disk { - initialize_params { - disk_size_gb = "100" - disk_type = "PD_STANDARD" - } - } - } - } -} - diff --git a/blueprints/data-solutions/vertex-mlops/outputs.tf b/blueprints/data-solutions/vertex-mlops/outputs.tf index 9cb390d628..5acac62239 100644 --- a/blueprints/data-solutions/vertex-mlops/outputs.tf +++ b/blueprints/data-solutions/vertex-mlops/outputs.tf @@ -14,9 +14,6 @@ * limitations under the License. */ -# TODO(): proper outputs - - locals { docker_split = try(split("/", module.artifact_registry.id), null) docker_repo = try("${local.docker_split[3]}-docker.pkg.dev/${local.docker_split[1]}/${local.docker_split[5]}", null) @@ -31,22 +28,19 @@ locals { } output "github" { - description = "Github Configuration." value = local.gh_config } output "notebook" { - description = "Vertex AI managed notebook details." - value = { for k, v in resource.google_notebooks_runtime.runtime : k => v.id } + description = "Vertex AI notebooks ids." + value = merge( + { for k, v in resource.google_notebooks_runtime.runtime : k => v.id }, + { for k, v in resource.google_notebooks_instance.playground : k => v.id } + ) } output "project" { description = "The project resource as return by the `project` module." value = module.project } - -output "project_id" { - description = "Project ID." - value = module.project.project_id -} diff --git a/blueprints/data-solutions/vertex-mlops/variables.tf b/blueprints/data-solutions/vertex-mlops/variables.tf index f3f6efad3d..1aa0646843 100644 --- a/blueprints/data-solutions/vertex-mlops/variables.tf +++ b/blueprints/data-solutions/vertex-mlops/variables.tf @@ -30,9 +30,9 @@ variable "dataset_name" { variable "groups" { description = "Name of the groups (name@domain.org) to apply opinionated IAM permissions." type = object({ - gcp-ml-ds = string - gcp-ml-eng = string - gcp-ml-viewer = string + gcp-ml-ds = optional(string, null) + gcp-ml-eng = optional(string, null) + gcp-ml-viewer = optional(string, null) }) default = { gcp-ml-ds = null @@ -71,16 +71,24 @@ variable "network_config" { } variable "notebooks" { - description = "Vertex AI workbenchs to be deployed." + description = "Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed." type = map(object({ - owner = string - region = string - subnet = string - internal_ip_only = optional(bool, false) - idle_shutdown = optional(bool) + type = string + machine_type = optional(string, "n1-standard-4") + internal_ip_only = optional(bool, true) + idle_shutdown = optional(bool, false) + owner = optional(string, null) })) - default = {} - nullable = false + validation { + condition = alltrue([ + for k, v in var.notebooks : contains(["USER_MANAGED", "MANAGED"], v.type)]) + error_message = "All `type` must be one of `USER_MANAGED` or `MANAGED`." + } + validation { + condition = alltrue([ + for k, v in var.notebooks : (v.type == "MANAGED" && try(v.owner != null, false) || v.type == "USER_MANAGED")]) + error_message = "`owner` must be set for `MANAGED` instances." + } } variable "prefix" { @@ -89,38 +97,17 @@ variable "prefix" { default = null } -variable "project_create" { - description = "Provide values if project creation is needed, uses existing project if null. Parent is in 'folders/nnn' or 'organizations/nnn' format." +variable "project_config" { + description = "Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format." type = object({ - billing_account_id = string + billing_account_id = optional(string, null) parent = string + project_id = string }) - default = null -} - -variable "project_id" { - description = "Project id, references existing project if `project_create` is null." - type = string -} - -variable "project_services" { - description = "List of core services enabled on all projects." - type = list(string) - default = [ - "aiplatform.googleapis.com", - "artifactregistry.googleapis.com", - "bigquery.googleapis.com", - "cloudbuild.googleapis.com", - "compute.googleapis.com", - "datacatalog.googleapis.com", - "dataflow.googleapis.com", - "iam.googleapis.com", - "monitoring.googleapis.com", - "notebooks.googleapis.com", - "secretmanager.googleapis.com", - "servicenetworking.googleapis.com", - "serviceusage.googleapis.com" - ] + validation { + condition = var.project_config.project_id != null + error_message = "Project id must be set." + } } variable "region" { @@ -135,18 +122,20 @@ variable "repo_name" { default = null } -variable "sa_mlops_name" { - description = "Name for the MLOPs Service Account." - type = string - default = "sa-mlops" -} - -variable "service_encryption_keys" { # service encription key +variable "service_encryption_keys" { description = "Cloud KMS to use to encrypt different services. Key location should match service region." type = object({ - bq = string - compute = string - storage = string + aiplatform = optional(string, null) + bq = optional(string, null) + notebooks = optional(string, null) + secretmanager = optional(string, null) + storage = optional(string, null) }) - default = null -} \ No newline at end of file + default = { + aiplatform = null + bq = null + notebooks = null + secretmanager = null + storage = null + } +} diff --git a/blueprints/data-solutions/vertex-mlops/vertex.tf b/blueprints/data-solutions/vertex-mlops/vertex.tf new file mode 100644 index 0000000000..264918b411 --- /dev/null +++ b/blueprints/data-solutions/vertex-mlops/vertex.tf @@ -0,0 +1,128 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +resource "google_vertex_ai_metadata_store" "store" { + provider = google-beta + project = module.project.project_id + name = "default" + description = "Vertex Ai Metadata Store" + region = var.region + dynamic "encryption_spec" { + for_each = try(var.service_encryption_keys.aiplatform, null) == null ? [] : [""] + + content { + kms_key_name = try(var.service_encryption_keys.aiplatform, null) + } + } + # `state` value will be decided automatically based on the result of the configuration + lifecycle { + ignore_changes = [state] + } +} + +module "service-account-notebook" { + source = "../../../modules/iam-service-account" + project_id = module.project.project_id + name = "notebook-sa" +} + +resource "google_notebooks_runtime" "runtime" { + for_each = { for k, v in var.notebooks : k => v if v.type == "MANAGED" } + name = "${var.prefix}-${each.key}" + + project = module.project.project_id + location = var.region + access_config { + access_type = "SINGLE_USER" + runtime_owner = try(var.notebooks[each.key].owner, null) + } + software_config { + enable_health_monitoring = true + } + virtual_machine { + virtual_machine_config { + machine_type = var.notebooks[each.key].machine_type + network = local.vpc + subnet = local.subnet + internal_ip_only = var.notebooks[each.key].internal_ip_only + dynamic "encryption_config" { + for_each = try(local.service_encryption_keys.notebooks, null) == null ? [] : [1] + content { + kms_key = local.service_encryption_keys.notebooks + } + } + metadata = { + notebook-disable-nbconvert = "false" + notebook-disable-downloads = "true" + notebook-disable-terminal = "false" + notebook-disable-root = "true" + } + data_disk { + initialize_params { + disk_size_gb = "100" + disk_type = "PD_STANDARD" + } + } + } + } +} + +resource "google_notebooks_instance" "playground" { + for_each = { for k, v in var.notebooks : k => v if v.type == "USER_MANAGED" } + name = "${var.prefix}-${each.key}" + location = format("%s-%s", var.region, "b") + machine_type = var.notebooks[each.key].machine_type + project = module.project.project_id + + container_image { + repository = "gcr.io/deeplearning-platform-release/base-cpu" + tag = "latest" + } + + install_gpu_driver = true + boot_disk_type = "PD_SSD" + boot_disk_size_gb = 110 + disk_encryption = try(local.service_encryption_keys.notebooks != null, false) ? "CMEK" : null + kms_key = try(local.service_encryption_keys.notebooks, null) + + no_public_ip = var.notebooks[each.key].internal_ip_only + no_proxy_access = false + + network = local.vpc + subnet = local.subnet + + instance_owners = try(tolist(var.notebooks[each.key].owner), null) + service_account = module.service-account-notebook.email + + metadata = { + notebook-disable-nbconvert = "false" + notebook-disable-downloads = "false" + notebook-disable-terminal = "false" + notebook-disable-root = "true" + } + + # Remove once terraform-provider-google/issues/9164 is fixed + lifecycle { + ignore_changes = [disk_encryption, kms_key] + } + + #TODO Uncomment once terraform-provider-google/issues/9273 is fixed + # tags = ["ssh"] + depends_on = [ + google_project_iam_member.shared_vpc, + ] +} + diff --git a/modules/project/service-agents.yaml b/modules/project/service-agents.yaml index 2ee32c63bd..9cabd186a4 100644 --- a/modules/project/service-agents.yaml +++ b/modules/project/service-agents.yaml @@ -18,6 +18,7 @@ service_agent: "service-%s@gcp-sa-adsdatahub.iam.gserviceaccount.com" - name: "aiplatform" service_agent: "service-%s@gcp-sa-aiplatform.iam.gserviceaccount.com" + jit: true - name: "aiplatform-cc" service_agent: "service-%s@gcp-sa-aiplatform-cc.iam.gserviceaccount.com" - name: "alloydb" From 1f4fac2f1d3c80da1285380cfbde6962181992fd Mon Sep 17 00:00:00 2001 From: lcaggio Date: Tue, 18 Apr 2023 17:51:24 +0200 Subject: [PATCH 2/9] fix lint --- blueprints/data-solutions/vertex-mlops/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/blueprints/data-solutions/vertex-mlops/README.md b/blueprints/data-solutions/vertex-mlops/README.md index adc76bd376..929b06fcaa 100644 --- a/blueprints/data-solutions/vertex-mlops/README.md +++ b/blueprints/data-solutions/vertex-mlops/README.md @@ -71,12 +71,10 @@ This blueprint can be used as a building block for setting up an end2end ML Ops | name | description | sensitive | |---|---|:---:| | [github](outputs.tf#L30) | Github Configuration. | | -| [notebook](outputs.tf#L35) | Vertex AI managed notebook details. | | +| [notebook](outputs.tf#L35) | Vertex AI notebooks ids. | | | [project](outputs.tf#L43) | The project resource as return by the `project` module. | | -| [project_id](outputs.tf#L48) | Project ID. | | - ## Test ```hcl From f4490fcaea04af83d2143009069e724398414cf3 Mon Sep 17 00:00:00 2001 From: lcaggio Date: Wed, 19 Apr 2023 11:22:50 +0200 Subject: [PATCH 3/9] Fix comments --- blueprints/data-solutions/bq-ml/README.md | 2 +- .../data-solutions/data-playground/README.md | 2 +- .../data-solutions/vertex-mlops/README.md | 12 +++--- .../data-solutions/vertex-mlops/ci-cd.tf | 6 +-- .../data-solutions/vertex-mlops/main.tf | 41 +++++++++++-------- .../data-solutions/vertex-mlops/variables.tf | 32 +++++++-------- .../data-solutions/vertex-mlops/vertex.tf | 15 ++++--- 7 files changed, 57 insertions(+), 53 deletions(-) diff --git a/blueprints/data-solutions/bq-ml/README.md b/blueprints/data-solutions/bq-ml/README.md index 45a18f00f3..385ec5299a 100644 --- a/blueprints/data-solutions/bq-ml/README.md +++ b/blueprints/data-solutions/bq-ml/README.md @@ -98,5 +98,5 @@ module "test" { prefix = "prefix" } -# tftest modules=9 resources=47 +# tftest modules=9 resources=48 ``` diff --git a/blueprints/data-solutions/data-playground/README.md b/blueprints/data-solutions/data-playground/README.md index 6691e496a9..a2de4db963 100644 --- a/blueprints/data-solutions/data-playground/README.md +++ b/blueprints/data-solutions/data-playground/README.md @@ -86,5 +86,5 @@ module "test" { parent = "folders/467898377" } } -# tftest modules=8 resources=40 +# tftest modules=8 resources=41 ``` diff --git a/blueprints/data-solutions/vertex-mlops/README.md b/blueprints/data-solutions/vertex-mlops/README.md index 929b06fcaa..2ce403b6b7 100644 --- a/blueprints/data-solutions/vertex-mlops/README.md +++ b/blueprints/data-solutions/vertex-mlops/README.md @@ -52,19 +52,19 @@ This blueprint can be used as a building block for setting up an end2end ML Ops | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [notebooks](variables.tf#L73) | Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed. | map(object({…})) | ✓ | | -| [project_config](variables.tf#L100) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | ✓ | | +| [notebooks](variables.tf#L73) | Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed. | map(object({…})) | ✓ | | +| [project_config](variables.tf#L100) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | ✓ | | | [bucket_name](variables.tf#L18) | GCS bucket name to store the Vertex AI artifacts. | string | | null | | [dataset_name](variables.tf#L24) | BigQuery Dataset to store the training data. | string | | null | -| [groups](variables.tf#L30) | Name of the groups (name@domain.org) to apply opinionated IAM permissions. | object({…}) | | {…} | +| [groups](variables.tf#L30) | Name of the groups (name@domain.org) to apply opinionated IAM permissions. | object({…}) | | {…} | | [identity_pool_claims](variables.tf#L45) | Claims to be used by Workload Identity Federation (i.e.: attribute.repository/ORGANIZATION/REPO). If a not null value is provided, then google_iam_workload_identity_pool resource will be created. | string | | null | | [labels](variables.tf#L51) | Labels to be assigned at project level. | map(string) | | {} | | [location](variables.tf#L57) | Location used for multi-regional resources. | string | | "eu" | | [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | | [prefix](variables.tf#L94) | Prefix used for the project id. | string | | null | -| [region](variables.tf#L113) | Region used for regional resources. | string | | "europe-west4" | -| [repo_name](variables.tf#L119) | Cloud Source Repository name. null to avoid to create it. | string | | null | -| [service_encryption_keys](variables.tf#L125) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | {…} | +| [region](variables.tf#L114) | Region used for regional resources. | string | | "europe-west4" | +| [repo_name](variables.tf#L120) | Cloud Source Repository name. null to avoid to create it. | string | | null | +| [service_encryption_keys](variables.tf#L126) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | {} | ## Outputs diff --git a/blueprints/data-solutions/vertex-mlops/ci-cd.tf b/blueprints/data-solutions/vertex-mlops/ci-cd.tf index dcff587e52..086b9d50bc 100644 --- a/blueprints/data-solutions/vertex-mlops/ci-cd.tf +++ b/blueprints/data-solutions/vertex-mlops/ci-cd.tf @@ -60,9 +60,9 @@ module "secret-manager" { secrets = { github-key = [var.region] } - # encryption_key = { - # "${var.region}" = try(var.service_encryption_keys["secretmanager"], null) - # } + encryption_key = { + "${var.region}" = var.service_encryption_keys.secretmanager + } iam = { github-key = { "roles/secretmanager.secretAccessor" = [ diff --git a/blueprints/data-solutions/vertex-mlops/main.tf b/blueprints/data-solutions/vertex-mlops/main.tf index ccb8c9d098..2e093f8653 100644 --- a/blueprints/data-solutions/vertex-mlops/main.tf +++ b/blueprints/data-solutions/vertex-mlops/main.tf @@ -64,8 +64,7 @@ locals { } ) - service_encryption_keys = var.service_encryption_keys - shared_vpc_project = try(var.network_config.host_project, null) + shared_vpc_project = try(var.network_config.host_project, null) subnet = ( local.use_shared_vpc @@ -109,7 +108,7 @@ module "gcs-bucket" { location = var.region storage_class = "REGIONAL" versioning = false - encryption_key = try(local.service_encryption_keys.storage, null) + encryption_key = var.service_encryption_keys.storage } # Default bucket for Cloud Build to prevent error: "'us' violates constraint ‘gcp.resourceLocations’" @@ -122,7 +121,7 @@ module "gcs-bucket-cloudbuild" { location = var.region storage_class = "REGIONAL" versioning = false - encryption_key = try(local.service_encryption_keys.storage, null) + encryption_key = var.service_encryption_keys.storage } module "bq-dataset" { @@ -131,7 +130,7 @@ module "bq-dataset" { project_id = module.project.project_id id = var.dataset_name location = var.region - encryption_key = try(local.service_encryption_keys.bq, null) + encryption_key = var.service_encryption_keys.bq } module "vpc-local" { @@ -191,18 +190,27 @@ module "cloudnat" { module "project" { source = "../../../modules/project" name = var.project_config.project_id - parent = try(var.project_config.parent, null) - billing_account = try(var.project_config.billing_account_id, null) + parent = var.project_config.parent + billing_account = var.project_config.billing_account_id project_create = var.project_config.billing_account_id != null prefix = var.prefix group_iam = local.group_iam iam = { - "roles/aiplatform.user" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] + "roles/aiplatform.user" = [ + module.service-account-mlops.iam_email, + module.service-account-notebook.iam_email + ] "roles/artifactregistry.reader" = [module.service-account-mlops.iam_email] "roles/artifactregistry.writer" = [module.service-account-github.iam_email] - "roles/bigquery.dataEditor" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] - "roles/bigquery.jobUser" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] - "roles/bigquery.user" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] + "roles/bigquery.dataEditor" = [ + module.service-account-mlops.iam_email, + module.service-account-notebook.iam_email + ] + "roles/bigquery.jobUser" = [ + module.service-account-mlops.iam_email, + module.service-account-notebook.iam_email + ] + "roles/bigquery.user" = [module.service-account-mlops.iam_email, module.service-account-notebook.iam_email] "roles/cloudbuild.builds.editor" = [ module.service-account-mlops.iam_email, module.service-account-github.iam_email @@ -232,11 +240,12 @@ module "project" { labels = var.labels service_encryption_key_ids = { - aiplatform = [try(local.service_encryption_keys.aiplatform, null)] - bq = [try(local.service_encryption_keys.bq, null)] - cloudbuild = [try(local.service_encryption_keys.storage, null)] - notebooks = [try(local.service_encryption_keys.notebooks, null)] - storage = [try(local.service_encryption_keys.storage, null)] + aiplatform = [var.service_encryption_keys.aiplatform] + bq = [var.service_encryption_keys.bq] + cloudbuild = [var.service_encryption_keys.storage] + notebooks = [var.service_encryption_keys.notebooks] + secretmanager = [var.service_encryption_keys.secretmanager] + storage = [var.service_encryption_keys.storage] } services = [ diff --git a/blueprints/data-solutions/vertex-mlops/variables.tf b/blueprints/data-solutions/vertex-mlops/variables.tf index 1aa0646843..b5800534d3 100644 --- a/blueprints/data-solutions/vertex-mlops/variables.tf +++ b/blueprints/data-solutions/vertex-mlops/variables.tf @@ -30,9 +30,9 @@ variable "dataset_name" { variable "groups" { description = "Name of the groups (name@domain.org) to apply opinionated IAM permissions." type = object({ - gcp-ml-ds = optional(string, null) - gcp-ml-eng = optional(string, null) - gcp-ml-viewer = optional(string, null) + gcp-ml-ds = optional(string) + gcp-ml-eng = optional(string) + gcp-ml-viewer = optional(string) }) default = { gcp-ml-ds = null @@ -77,7 +77,7 @@ variable "notebooks" { machine_type = optional(string, "n1-standard-4") internal_ip_only = optional(bool, true) idle_shutdown = optional(bool, false) - owner = optional(string, null) + owner = optional(string) })) validation { condition = alltrue([ @@ -100,14 +100,15 @@ variable "prefix" { variable "project_config" { description = "Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format." type = object({ - billing_account_id = optional(string, null) - parent = string + billing_account_id = optional(string) + parent = optional(string) project_id = string }) validation { condition = var.project_config.project_id != null error_message = "Project id must be set." } + nullable = false } variable "region" { @@ -125,17 +126,12 @@ variable "repo_name" { variable "service_encryption_keys" { description = "Cloud KMS to use to encrypt different services. Key location should match service region." type = object({ - aiplatform = optional(string, null) - bq = optional(string, null) - notebooks = optional(string, null) - secretmanager = optional(string, null) - storage = optional(string, null) + aiplatform = optional(string) + bq = optional(string) + notebooks = optional(string) + secretmanager = optional(string) + storage = optional(string) }) - default = { - aiplatform = null - bq = null - notebooks = null - secretmanager = null - storage = null - } + default = {} + nullable = false } diff --git a/blueprints/data-solutions/vertex-mlops/vertex.tf b/blueprints/data-solutions/vertex-mlops/vertex.tf index 264918b411..7515275183 100644 --- a/blueprints/data-solutions/vertex-mlops/vertex.tf +++ b/blueprints/data-solutions/vertex-mlops/vertex.tf @@ -21,10 +21,10 @@ resource "google_vertex_ai_metadata_store" "store" { description = "Vertex Ai Metadata Store" region = var.region dynamic "encryption_spec" { - for_each = try(var.service_encryption_keys.aiplatform, null) == null ? [] : [""] + for_each = var.service_encryption_keys.aiplatform == null ? [] : [""] content { - kms_key_name = try(var.service_encryption_keys.aiplatform, null) + kms_key_name = var.service_encryption_keys.aiplatform } } # `state` value will be decided automatically based on the result of the configuration @@ -42,7 +42,6 @@ module "service-account-notebook" { resource "google_notebooks_runtime" "runtime" { for_each = { for k, v in var.notebooks : k => v if v.type == "MANAGED" } name = "${var.prefix}-${each.key}" - project = module.project.project_id location = var.region access_config { @@ -59,9 +58,9 @@ resource "google_notebooks_runtime" "runtime" { subnet = local.subnet internal_ip_only = var.notebooks[each.key].internal_ip_only dynamic "encryption_config" { - for_each = try(local.service_encryption_keys.notebooks, null) == null ? [] : [1] + for_each = var.service_encryption_keys.notebooks == null ? [] : [1] content { - kms_key = local.service_encryption_keys.notebooks + kms_key = var.service_encryption_keys.notebooks } } metadata = { @@ -83,7 +82,7 @@ resource "google_notebooks_runtime" "runtime" { resource "google_notebooks_instance" "playground" { for_each = { for k, v in var.notebooks : k => v if v.type == "USER_MANAGED" } name = "${var.prefix}-${each.key}" - location = format("%s-%s", var.region, "b") + location = "${var.region}-b" machine_type = var.notebooks[each.key].machine_type project = module.project.project_id @@ -95,8 +94,8 @@ resource "google_notebooks_instance" "playground" { install_gpu_driver = true boot_disk_type = "PD_SSD" boot_disk_size_gb = 110 - disk_encryption = try(local.service_encryption_keys.notebooks != null, false) ? "CMEK" : null - kms_key = try(local.service_encryption_keys.notebooks, null) + disk_encryption = var.service_encryption_keys.notebooks != null ? "CMEK" : null + kms_key = var.service_encryption_keys.notebooks no_public_ip = var.notebooks[each.key].internal_ip_only no_proxy_access = false From f5c5ac060623a2e083e329e3188702101abe0c2a Mon Sep 17 00:00:00 2001 From: lcaggio Date: Wed, 19 Apr 2023 11:46:39 +0200 Subject: [PATCH 4/9] Fix python linting due to yapf new version. --- blueprints/cloud-operations/network-dashboard/src/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/blueprints/cloud-operations/network-dashboard/src/main.py b/blueprints/cloud-operations/network-dashboard/src/main.py index ec5e5c6ea7..3d0568b695 100755 --- a/blueprints/cloud-operations/network-dashboard/src/main.py +++ b/blueprints/cloud-operations/network-dashboard/src/main.py @@ -80,8 +80,9 @@ def do_discovery(resources): resources[result.type][result.id][result.key] = result.data else: resources[result.type][result.id] = result.data - LOGGER.info('discovery end {}'.format( - {k: len(v) for k, v in resources.items() if not isinstance(v, str)})) + LOGGER.info('discovery end {}'.format({ + k: len(v) for k, v in resources.items() if not isinstance(v, str) + })) def do_init(resources, discovery_root, monitoring_project, folders=None, From 30bef8546f4925bbddf8e706a9ee4b53665674ad Mon Sep 17 00:00:00 2001 From: lcaggio Date: Wed, 19 Apr 2023 12:57:55 +0200 Subject: [PATCH 5/9] Create metadata yaml file --- .../data-solutions/vertex-mlops/README.md | 18 +-- .../data-solutions/vertex-mlops/metadata.yaml | 131 ++++++++++++++++++ .../data-solutions/vertex-mlops/variables.tf | 6 +- 3 files changed, 141 insertions(+), 14 deletions(-) create mode 100644 blueprints/data-solutions/vertex-mlops/metadata.yaml diff --git a/blueprints/data-solutions/vertex-mlops/README.md b/blueprints/data-solutions/vertex-mlops/README.md index 2ce403b6b7..b8e36e1f09 100644 --- a/blueprints/data-solutions/vertex-mlops/README.md +++ b/blueprints/data-solutions/vertex-mlops/README.md @@ -4,21 +4,21 @@ This example implements the infrastructure required to deploy an end-to-end [MLOps process](https://services.google.com/fh/files/misc/practitioners_guide_to_mlops_whitepaper.pdf) using [Vertex AI](https://cloud.google.com/vertex-ai) platform. -## GCP resources +## Architecture The blueprint will deploy all the required resources to have a fully functional MLOPs environment containing: -- Vertex Workbench (for the experimentation environment) -- GCP Project (optional) to host all the resources +- Vertex Workbench (for the experimentation environment). +- GCP Project (optional) to host all the resources. - Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable. -- Firewall rule to allow the internal subnet communication required by Dataflow -- Cloud NAT required to reach the internet from the different computing resources (Vertex and Dataflow) -- GCS buckets to host Vertex AI and Cloud Build Artifacts. By default the buckets will be regional and should match the Vertex AI region for the different resources (i.e. Vertex Managed Dataset) and processes (i.e. Vertex trainining) +- Firewall rule to allow the internal subnet communication required by Dataflow. +- Cloud NAT required to reach the internet from the different computing resources (Vertex and Dataflow). +- GCS buckets to host Vertex AI and Cloud Build Artifacts. By default the buckets will be regional and should match the Vertex AI region for the different resources (i.e. Vertex Managed Dataset) and processes (i.e. Vertex trainining). - BigQuery Dataset where the training data will be stored. This is optional, since the training data could be already hosted in an existing BigQuery dataset. - Artifact Registry Docker repository to host the custom images. -- Service account (`mlops-[env]@`) with the minimum permissions required by Vertex AI and Dataflow (if this service is used inside of the Vertex AI Pipeline). -- Service account (`github@`) to be used by Workload Identity Federation, to federate Github identity (Optional). -- Secret to store the Github SSH key to get access the CICD code repo. +- Service account (`PREFIX-sa-mlops`) with the minimum permissions required by Vertex AI and Dataflow (if this service is used inside of the Vertex AI Pipeline). +- Service account (`PREFIX-sa-github@`) to be used by Workload Identity Federation, to federate Github identity (Optional). +- Secret Manager to store the Github SSH key to get access the CICD code repo. ![MLOps project description](./images/mlops_projects.png "MLOps project description") diff --git a/blueprints/data-solutions/vertex-mlops/metadata.yaml b/blueprints/data-solutions/vertex-mlops/metadata.yaml new file mode 100644 index 0000000000..254cba0b6d --- /dev/null +++ b/blueprints/data-solutions/vertex-mlops/metadata.yaml @@ -0,0 +1,131 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: blueprints.cloud.google.com/v1alpha1 +kind: BlueprintMetadata +metadata: + name: terraform-google-fabric-data_solutiont-vertex_mlops +spec: + title: MLOps with Vertex AI + source: + repo: https://github.com/GoogleCloudPlatform/terraform-google-three-tier-web-app.git + sourceType: git + version: 21.0.0 + actuationTool: + type: Terraform + version: '>= 0.13' + description: + tagline: MLOps with Vertex AI + detailed: |- + This example implements the infrastructure required to deploy an end-to-end MLOps process using Vertex AI platform. + architecture: + - Vertex Workbench (for the experimentation environment). + - GCP Project (optional) to host all the resources. + - Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable. + - Firewall rule to allow the internal subnet communication required by Dataflow. + - Cloud NAT required to reach the internet from the different computing resources (Vertex and Dataflow). + - GCS buckets to host Vertex AI and Cloud Build Artifacts. By default the buckets will be regional and should match the Vertex AI region for the different resources (i.e. Vertex Managed Dataset) and processes (i.e. Vertex trainining). + - BigQuery Dataset where the training data will be stored. This is optional, since the training data could be already hosted in an existing BigQuery dataset. + - Artifact Registry Docker repository to host the custom images. + - Service account (`PREFIX-sa-mlops`) with the minimum permissions required by Vertex AI and Dataflow (if this service is used inside of the Vertex AI Pipeline). + - Service account (`PREFIX-sa-github@`) to be used by Workload Identity Federation, to federate Github identity (Optional). + - Secret Manager to store the Github SSH key to get access the CICD code repo. + documentation: + - title: Architecture Diagram + url: https://github.com/GoogleCloudPlatform/cloud-foundation-fabric/blob/master/blueprints/data-solutions/vertex-mlops/images/mlops_projects.png + variables: + - name: notebooks + description: Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed. + type: map(object({...})) + required: true + - name: project_config + description: Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. + type: object({...}) + required: true + - name: bucket_name + description: GCS bucket name to store the Vertex AI artifacts. + type: string + default: null + required: false + - name: dataset_name + description: BigQuery Dataset to store the training data. + type: string + default: null + required: false + - name: groups + description: Name of the groups (group_name@domain.org) to apply opinionated IAM permissions. + type: object({...}) + required: false + - name: identity_pool_claims + description: Claims to be used by Workload Identity Federation (i.e.: attribute.repository/ORGANIZATION/REPO). If a not null value is provided, then google_iam_workload_identity_pool resource will be created. + type: string + required: false + - name: labels + description: Labels to be assigned at project level. + type: map(string) + required: false + - name: location + description: Location used for multi-regional resources. + type: string + required: false + - name: network_config + description: Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. + type: object({…}) + required: false + - name: prefix + description: Prefix used for the project id. + type: string + required: false + - name: region + description: Region used for regional resources. + type: string + required: false + - name: repo_name + description: Cloud Source Repository name. null to avoid to create it. + type: string + required: false + - name: service_encryption_keys + description: Cloud KMS to use to encrypt different services. Key location should match service region. + type: object({…}) + required: false + outputs: + - name: github + description: Github Configuration. + - name: notebook + description: Vertex AI notebooks ids. + - name: project + description: The project resource as return by the project module. + roles: + - level: Project + roles: + - roles/owner + services: + - aiplatform.googleapis.com + - artifactregistry.googleapis.com + - bigquery.googleapis.com + - bigquerystorage.googleapis.com + - cloudbuild.googleapis.com + - compute.googleapis.com + - datacatalog.googleapis.com + - dataflow.googleapis.com + - iam.googleapis.com + - ml.googleapis.com + - monitoring.googleapis.com + - notebooks.googleapis.com + - secretmanager.googleapis.com + - servicenetworking.googleapis.com + - serviceusage.googleapis.com + - stackdriver.googleapis.com + - storage.googleapis.com + - storage-component.googleapis.com \ No newline at end of file diff --git a/blueprints/data-solutions/vertex-mlops/variables.tf b/blueprints/data-solutions/vertex-mlops/variables.tf index b5800534d3..cc8200f9d4 100644 --- a/blueprints/data-solutions/vertex-mlops/variables.tf +++ b/blueprints/data-solutions/vertex-mlops/variables.tf @@ -34,11 +34,7 @@ variable "groups" { gcp-ml-eng = optional(string) gcp-ml-viewer = optional(string) }) - default = { - gcp-ml-ds = null - gcp-ml-eng = null - gcp-ml-viewer = null - } + default = {} nullable = false } From 9fdf80738f55308cd7310723b77adb558bc82e8c Mon Sep 17 00:00:00 2001 From: lcaggio Date: Wed, 19 Apr 2023 14:09:40 +0200 Subject: [PATCH 6/9] Fix linting. --- .../data-solutions/vertex-mlops/README.md | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/blueprints/data-solutions/vertex-mlops/README.md b/blueprints/data-solutions/vertex-mlops/README.md index b8e36e1f09..e27312d8ef 100644 --- a/blueprints/data-solutions/vertex-mlops/README.md +++ b/blueprints/data-solutions/vertex-mlops/README.md @@ -52,19 +52,19 @@ This blueprint can be used as a building block for setting up an end2end ML Ops | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| [notebooks](variables.tf#L73) | Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed. | map(object({…})) | ✓ | | -| [project_config](variables.tf#L100) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | ✓ | | +| [notebooks](variables.tf#L69) | Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed. | map(object({…})) | ✓ | | +| [project_config](variables.tf#L96) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…}) | ✓ | | | [bucket_name](variables.tf#L18) | GCS bucket name to store the Vertex AI artifacts. | string | | null | | [dataset_name](variables.tf#L24) | BigQuery Dataset to store the training data. | string | | null | -| [groups](variables.tf#L30) | Name of the groups (name@domain.org) to apply opinionated IAM permissions. | object({…}) | | {…} | -| [identity_pool_claims](variables.tf#L45) | Claims to be used by Workload Identity Federation (i.e.: attribute.repository/ORGANIZATION/REPO). If a not null value is provided, then google_iam_workload_identity_pool resource will be created. | string | | null | -| [labels](variables.tf#L51) | Labels to be assigned at project level. | map(string) | | {} | -| [location](variables.tf#L57) | Location used for multi-regional resources. | string | | "eu" | -| [network_config](variables.tf#L63) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | -| [prefix](variables.tf#L94) | Prefix used for the project id. | string | | null | -| [region](variables.tf#L114) | Region used for regional resources. | string | | "europe-west4" | -| [repo_name](variables.tf#L120) | Cloud Source Repository name. null to avoid to create it. | string | | null | -| [service_encryption_keys](variables.tf#L126) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | {} | +| [groups](variables.tf#L30) | Name of the groups (name@domain.org) to apply opinionated IAM permissions. | object({…}) | | {} | +| [identity_pool_claims](variables.tf#L41) | Claims to be used by Workload Identity Federation (i.e.: attribute.repository/ORGANIZATION/REPO). If a not null value is provided, then google_iam_workload_identity_pool resource will be created. | string | | null | +| [labels](variables.tf#L47) | Labels to be assigned at project level. | map(string) | | {} | +| [location](variables.tf#L53) | Location used for multi-regional resources. | string | | "eu" | +| [network_config](variables.tf#L59) | Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. | object({…}) | | null | +| [prefix](variables.tf#L90) | Prefix used for the project id. | string | | null | +| [region](variables.tf#L110) | Region used for regional resources. | string | | "europe-west4" | +| [repo_name](variables.tf#L116) | Cloud Source Repository name. null to avoid to create it. | string | | null | +| [service_encryption_keys](variables.tf#L122) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…}) | | {} | ## Outputs From b896ccb9e58691019e762715147a8b49bfc10707 Mon Sep 17 00:00:00 2001 From: lcaggio Date: Wed, 19 Apr 2023 15:20:29 +0200 Subject: [PATCH 7/9] Update metadata types and defaults. --- .../data-solutions/vertex-mlops/metadata.yaml | 52 ++++++++++++++++--- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/blueprints/data-solutions/vertex-mlops/metadata.yaml b/blueprints/data-solutions/vertex-mlops/metadata.yaml index 254cba0b6d..a2e0a7f838 100644 --- a/blueprints/data-solutions/vertex-mlops/metadata.yaml +++ b/blueprints/data-solutions/vertex-mlops/metadata.yaml @@ -19,9 +19,9 @@ metadata: spec: title: MLOps with Vertex AI source: - repo: https://github.com/GoogleCloudPlatform/terraform-google-three-tier-web-app.git + repo: https://github.com/GoogleCloudPlatform/cloud-foundation-fabric.git sourceType: git - version: 21.0.0 + version: 1.0.0 actuationTool: type: Terraform version: '>= 0.13' @@ -47,11 +47,23 @@ spec: variables: - name: notebooks description: Vertex AI workbenchs to be deployed. Service Account runtime/instances deployed. - type: map(object({...})) + type: |- + map(object({ + type = string + machine_type = optional(string, "n1-standard-4") + internal_ip_only = optional(bool, true) + idle_shutdown = optional(bool, false) + owner = optional(string) + })) required: true - name: project_config description: Provide 'billing_account_id' value if project creation is needed, uses existing 'project_id' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. - type: object({...}) + type: |- + object({ + billing_account_id = optional(string) + parent = optional(string) + project_id = string + }) required: true - name: bucket_name description: GCS bucket name to store the Vertex AI artifacts. @@ -65,39 +77,65 @@ spec: required: false - name: groups description: Name of the groups (group_name@domain.org) to apply opinionated IAM permissions. - type: object({...}) + type: |- + object({ + gcp-ml-ds = optional(string), + gcp-ml-eng = optional(string), + gcp-ml-viewer = optional(string) + }) + default: {} required: false - name: identity_pool_claims description: Claims to be used by Workload Identity Federation (i.e.: attribute.repository/ORGANIZATION/REPO). If a not null value is provided, then google_iam_workload_identity_pool resource will be created. type: string + default: null required: false - name: labels description: Labels to be assigned at project level. type: map(string) required: false + default: {} - name: location description: Location used for multi-regional resources. type: string + default: eu required: false - name: network_config description: Shared VPC network configurations to use. If null networks will be created in projects with preconfigured values. - type: object({…}) + type: |- + object({ + host_project = string + network_self_link = string + subnet_self_link = string + }) + default: null required: false - name: prefix description: Prefix used for the project id. type: string + default: null required: false - name: region description: Region used for regional resources. type: string + default: europe-west4 required: false - name: repo_name description: Cloud Source Repository name. null to avoid to create it. type: string + default: null required: false - name: service_encryption_keys description: Cloud KMS to use to encrypt different services. Key location should match service region. - type: object({…}) + type: |- + object({ + aiplatform = optional(string) + bq = optional(string) + notebooks = optional(string) + secretmanager = optional(string) + storage = optional(string) + }) + default: {} required: false outputs: - name: github From 8488e866bc96b83bef49d3217317659a9e8e4fcb Mon Sep 17 00:00:00 2001 From: lcaggio Date: Thu, 20 Apr 2023 16:53:09 +0200 Subject: [PATCH 8/9] Update readme --- .../data-solutions/vertex-mlops/README.md | 53 ++++++++++++++----- .../vertex-mlops/blueprint-providers.tf | 4 +- .../data-solutions/vertex-mlops/metadata.yaml | 4 +- .../vertex-mlops/terraform.tfvars.sample | 20 ------- 4 files changed, 45 insertions(+), 36 deletions(-) delete mode 100644 blueprints/data-solutions/vertex-mlops/terraform.tfvars.sample diff --git a/blueprints/data-solutions/vertex-mlops/README.md b/blueprints/data-solutions/vertex-mlops/README.md index e27312d8ef..31e6a50782 100644 --- a/blueprints/data-solutions/vertex-mlops/README.md +++ b/blueprints/data-solutions/vertex-mlops/README.md @@ -1,6 +1,10 @@ # MLOps with Vertex AI -## Introduction +## Tagline + +Create a Vertex AI environment needed for MLOps. + +## Detailed This example implements the infrastructure required to deploy an end-to-end [MLOps process](https://services.google.com/fh/files/misc/practitioners_guide_to_mlops_whitepaper.pdf) using [Vertex AI](https://cloud.google.com/vertex-ai) platform. @@ -8,17 +12,19 @@ This example implements the infrastructure required to deploy an end-to-end [MLO The blueprint will deploy all the required resources to have a fully functional MLOPs environment containing: -- Vertex Workbench (for the experimentation environment). -- GCP Project (optional) to host all the resources. -- Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable. -- Firewall rule to allow the internal subnet communication required by Dataflow. -- Cloud NAT required to reach the internet from the different computing resources (Vertex and Dataflow). -- GCS buckets to host Vertex AI and Cloud Build Artifacts. By default the buckets will be regional and should match the Vertex AI region for the different resources (i.e. Vertex Managed Dataset) and processes (i.e. Vertex trainining). -- BigQuery Dataset where the training data will be stored. This is optional, since the training data could be already hosted in an existing BigQuery dataset. -- Artifact Registry Docker repository to host the custom images. -- Service account (`PREFIX-sa-mlops`) with the minimum permissions required by Vertex AI and Dataflow (if this service is used inside of the Vertex AI Pipeline). -- Service account (`PREFIX-sa-github@`) to be used by Workload Identity Federation, to federate Github identity (Optional). -- Secret Manager to store the Github SSH key to get access the CICD code repo. +1. Vertex Workbench (for the experimentation environment). +1. GCP Project (optional) to host all the resources. +1. Isolated VPC network and a subnet to be used by Vertex and Dataflow. Alternatively, an external Shared VPC can be configured using the `network_config`variable. +1. Firewall rule to allow the internal subnet communication required by Dataflow. +1. Cloud NAT required to reach the internet from the different computing resources (Vertex and Dataflow). +1. GCS buckets to host Vertex AI and Cloud Build Artifacts. By default the buckets will be regional and should match the Vertex AI region for the different resources (i.e. Vertex Managed Dataset) and processes (i.e. Vertex trainining). +1. BigQuery Dataset where the training data will be stored. This is optional, since the training data could be already hosted in an existing BigQuery dataset. +1. Artifact Registry Docker repository to host the custom images. +1. Service account (`PREFIX-sa-mlops`) with the minimum permissions required by Vertex AI and Dataflow (if this service is used inside of the Vertex AI Pipeline). +1. Service account (`PREFIX-sa-github@`) to be used by Workload Identity Federation, to federate Github identity (Optional). +1. Secret Manager to store the Github SSH key to get access the CICD code repo. + +## Documentation ![MLOps project description](./images/mlops_projects.png "MLOps project description") @@ -46,6 +52,29 @@ Please note that these groups are not suitable for production grade environments ## What's next? This blueprint can be used as a building block for setting up an end2end ML Ops solution. As next step, you can follow this [guide](https://cloud.google.com/architecture/architecture-for-mlops-using-tfx-kubeflow-pipelines-and-cloud-build) to setup a Vertex AI pipeline and run it on the deployed infraestructure. + +## Usage + +Basic usage of this module is as follows: + +```hcl +module "test" { + source = "./fabric/blueprints/data-solutions/vertex-mlops/" + notebooks = { + "myworkbench" = { + type = "USER_MANAGED" + } + } + prefix = "pref-dev" + project_config = { + billing_account_id = "000000-123456-123456" + parent = "folders/111111111111" + project_id = "test-dev" + } +} +# tftest modules=11 resources=60 +``` + ## Variables diff --git a/blueprints/data-solutions/vertex-mlops/blueprint-providers.tf b/blueprints/data-solutions/vertex-mlops/blueprint-providers.tf index a1dcb26db2..985f2afd3b 100644 --- a/blueprints/data-solutions/vertex-mlops/blueprint-providers.tf +++ b/blueprints/data-solutions/vertex-mlops/blueprint-providers.tf @@ -16,9 +16,9 @@ terraform { provider_meta "google" { - module_name = "blueprints/terraform/fabric-blueprints:vertex-mlops/v1.0.0" + module_name = "blueprints/terraform/fabric-blueprints:vertex-mlops/v21.0.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/fabric-blueprints:vertex-mlops/v1.0.0" + module_name = "blueprints/terraform/fabric-blueprints:vertex-mlops/v21.0.0" } } diff --git a/blueprints/data-solutions/vertex-mlops/metadata.yaml b/blueprints/data-solutions/vertex-mlops/metadata.yaml index a2e0a7f838..8816123f8c 100644 --- a/blueprints/data-solutions/vertex-mlops/metadata.yaml +++ b/blueprints/data-solutions/vertex-mlops/metadata.yaml @@ -21,10 +21,10 @@ spec: source: repo: https://github.com/GoogleCloudPlatform/cloud-foundation-fabric.git sourceType: git - version: 1.0.0 + version: 21.0.0 actuationTool: type: Terraform - version: '>= 0.13' + version: '>= 1.3.0' description: tagline: MLOps with Vertex AI detailed: |- diff --git a/blueprints/data-solutions/vertex-mlops/terraform.tfvars.sample b/blueprints/data-solutions/vertex-mlops/terraform.tfvars.sample deleted file mode 100644 index 097bac3a8c..0000000000 --- a/blueprints/data-solutions/vertex-mlops/terraform.tfvars.sample +++ /dev/null @@ -1,20 +0,0 @@ -bucket_name = "creditcards-dev" -dataset_name = "creditcards" -identity_pool_claims = "attribute.repository/ORGANIZATION/REPO" -labels = { - "env" : "dev", - "team" : "ml" -} -notebooks = { - "myworkbench" : { - "owner" : "user@example.com", - "region" : "europe-west4", - "subnet" : "default", - } -} -prefix = "pref" -project_id = "creditcards-dev" -project_create = { - billing_account_id = "000000-123456-123456" - parent = "folders/111111111111" -} From bca8a33f1eb02d4aeec529419bf6d3ff886a9f1c Mon Sep 17 00:00:00 2001 From: Julio Castillo Date: Mon, 24 Apr 2023 20:36:50 +0200 Subject: [PATCH 9/9] Fix typo and blueprint module name --- blueprints/data-solutions/vertex-mlops/metadata.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blueprints/data-solutions/vertex-mlops/metadata.yaml b/blueprints/data-solutions/vertex-mlops/metadata.yaml index 8816123f8c..3c839374b1 100644 --- a/blueprints/data-solutions/vertex-mlops/metadata.yaml +++ b/blueprints/data-solutions/vertex-mlops/metadata.yaml @@ -15,7 +15,7 @@ apiVersion: blueprints.cloud.google.com/v1alpha1 kind: BlueprintMetadata metadata: - name: terraform-google-fabric-data_solutiont-vertex_mlops + name: fabric-blueprint-vertex-mlops spec: title: MLOps with Vertex AI source: @@ -166,4 +166,4 @@ spec: - serviceusage.googleapis.com - stackdriver.googleapis.com - storage.googleapis.com - - storage-component.googleapis.com \ No newline at end of file + - storage-component.googleapis.com