diff --git a/modules/cloud-dataplex/README.md b/modules/cloud-dataplex/README.md
index 6849db8446..c3c02a7c5b 100644
--- a/modules/cloud-dataplex/README.md
+++ b/modules/cloud-dataplex/README.md
@@ -1,7 +1,6 @@
# Cloud Dataplex instance with lake, zone & assests
-This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions.
-
+This module manages the creation of Cloud Dataplex instance along with lake, zone & assets in single regions.
## Simple example
@@ -16,39 +15,97 @@ module "dataplex" {
project_id = "myproject"
region = "europe-west2"
zones = {
- zone_1 = {
+ landing = {
type = "RAW"
discovery = true
assets = {
- asset_1 = {
- bucket_name = "asset_1"
+ gcs_1 = {
+ resource_name = "gcs_bucket"
cron_schedule = "15 15 * * *"
discovery_spec_enabled = true
resource_spec_type = "STORAGE_BUCKET"
}
}
},
- zone_2 = {
+ curated = {
type = "CURATED"
+ discovery = false
+ assets = {
+ bq_1 = {
+ resource_name = "bq_dataset"
+ cron_schedule = null
+ discovery_spec_enabled = false
+ resource_spec_type = "BIGQUERY_DATASET"
+ }
+ }
+ }
+ }
+}
+
+# tftest modules=1 resources=5
+```
+
+## IAM
+
+This example shows how to setup a Cloud Dataplex instance, lake, zone & asset creation in GCP project assigning IAM roles at lake and zone level.
+
+```hcl
+
+module "dataplex" {
+ source = "./fabric/modules/cloud-dataplex"
+ name = "lake"
+ prefix = "test"
+ project_id = "myproject"
+ region = "europe-west2"
+ iam = {
+ "roles/dataplex.viewer" = [
+ "group:analysts@example.com",
+ "group:analysts_sensitive@example.com"
+ ]
+ }
+ zones = {
+ landing = {
+ type = "RAW"
discovery = true
assets = {
- asset_2 = {
- bucket_name = "asset_2"
+ gcs_1 = {
+ resource_name = "gcs_bucket"
cron_schedule = "15 15 * * *"
discovery_spec_enabled = true
resource_spec_type = "STORAGE_BUCKET"
}
}
+ },
+ curated = {
+ type = "CURATED"
+ discovery = false
+ iam = {
+ "roles/viewer" = [
+ "group:analysts@example.com",
+ "group:analysts_sensitive@example.com"
+ ]
+ "roles/dataplex.dataReader" = [
+ "group:analysts@example.com",
+ "group:analysts_sensitive@example.com"
+ ]
+ }
+ assets = {
+ bq_1 = {
+ resource_name = "bq_dataset"
+ cron_schedule = null
+ discovery_spec_enabled = false
+ resource_spec_type = "BIGQUERY_DATASET"
+ }
+ }
}
}
}
-# tftest modules=1 resources=5
+# tftest modules=1 resources=8
```
+
## TODO
-- [ ] Add IAM support
-- [ ] support different type of assets
- [ ] support multi-regions
@@ -56,12 +113,13 @@ module "dataplex" {
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
-| [name](variables.tf#L23) | Name of Dataplex Lake. | string
| ✓ | |
-| [prefix](variables.tf#L28) | Optional prefix used to generate Dataplex Lake. | string
| ✓ | |
-| [project_id](variables.tf#L33) | The ID of the project where this Dataplex Lake will be created. | string
| ✓ | |
-| [region](variables.tf#L38) | Region of the Dataplax Lake. | string
| ✓ | |
-| [zones](variables.tf#L43) | Dataplex lake zones, such as `RAW` and `CURATED`. | map(object({…}))
| ✓ | |
-| [location_type](variables.tf#L17) | The location type of the Dataplax Lake. | string
| | "SINGLE_REGION"
|
+| [name](variables.tf#L30) | Name of Dataplex Lake. | string
| ✓ | |
+| [project_id](variables.tf#L41) | The ID of the project where this Dataplex Lake will be created. | string
| ✓ | |
+| [region](variables.tf#L46) | Region of the Dataplax Lake. | string
| ✓ | |
+| [zones](variables.tf#L51) | Dataplex lake zones, such as `RAW` and `CURATED`. | map(object({…}))
| ✓ | |
+| [iam](variables.tf#L17) | Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string))
| | {}
|
+| [location_type](variables.tf#L24) | The location type of the Dataplax Lake. | string
| | "SINGLE_REGION"
|
+| [prefix](variables.tf#L35) | Optional prefix used to generate Dataplex Lake. | string
| | null
|
## Outputs
diff --git a/modules/cloud-dataplex/main.tf b/modules/cloud-dataplex/main.tf
index af5ef018f8..b78ca54819 100644
--- a/modules/cloud-dataplex/main.tf
+++ b/modules/cloud-dataplex/main.tf
@@ -21,28 +21,54 @@ locals {
for asset, asset_data in zones_info.assets : {
zone_name = zone
asset_name = asset
- bucket_name = asset_data.bucket_name
- cron_schedule = asset_data.cron_schedule
+ resource_name = asset_data.resource_name
+ resource_project = coalesce(asset_data.resource_project, var.project_id)
+ cron_schedule = asset_data.discovery_spec_enabled ? asset_data.cron_schedule : null
discovery_spec_enabled = asset_data.discovery_spec_enabled
resource_spec_type = asset_data.resource_spec_type
}
]
])
+
+ zone_iam = flatten([
+ for zone, zone_details in var.zones : [
+ for role, members in zone_details.iam : {
+ "zone" = zone
+ "role" = role
+ "members" = members
+ }
+ ] if zone_details.iam != null
+ ])
+
+ resource_type_mapping = {
+ "STORAGE_BUCKET" : "buckets",
+ "BIGQUERY_DATASET" : "datasets"
+ }
}
-resource "google_dataplex_lake" "basic_lake" {
+resource "google_dataplex_lake" "lake" {
name = "${local.prefix}${var.name}"
location = var.region
provider = google-beta
project = var.project_id
}
-resource "google_dataplex_zone" "basic_zone" {
+resource "google_dataplex_lake_iam_binding" "binding" {
+ for_each = var.iam
+ project = var.project_id
+ location = var.region
+ lake = google_dataplex_lake.lake.name
+ role = each.key
+ members = each.value
+}
+
+resource "google_dataplex_zone" "zone" {
for_each = var.zones
+ provider = google-beta
+ project = var.project_id
name = each.key
location = var.region
- provider = google-beta
- lake = google_dataplex_lake.basic_lake.name
+ lake = google_dataplex_lake.lake.name
type = each.value.type
discovery_spec {
@@ -52,11 +78,21 @@ resource "google_dataplex_zone" "basic_zone" {
resource_spec {
location_type = var.location_type
}
+}
- project = var.project_id
+resource "google_dataplex_zone_iam_binding" "binding" {
+ for_each = {
+ for zone_role in local.zone_iam : "${zone_role.zone}-${zone_role.role}" => zone_role
+ }
+ project = var.project_id
+ location = var.region
+ lake = google_dataplex_lake.lake.name
+ dataplex_zone = google_dataplex_zone.zone[each.value.zone].name
+ role = each.value.role
+ members = each.value.members
}
-resource "google_dataplex_asset" "primary" {
+resource "google_dataplex_asset" "asset" {
for_each = {
for tm in local.zone_assets : "${tm.zone_name}-${tm.asset_name}" => tm
}
@@ -64,8 +100,8 @@ resource "google_dataplex_asset" "primary" {
location = var.region
provider = google-beta
- lake = google_dataplex_lake.basic_lake.name
- dataplex_zone = google_dataplex_zone.basic_zone[each.value.zone_name].name
+ lake = google_dataplex_lake.lake.name
+ dataplex_zone = google_dataplex_zone.zone[each.value.zone_name].name
discovery_spec {
enabled = each.value.discovery_spec_enabled
@@ -73,7 +109,11 @@ resource "google_dataplex_asset" "primary" {
}
resource_spec {
- name = "projects/${var.project_id}/buckets/${each.value.bucket_name}"
+ name = format("projects/%s/%s/%s",
+ each.value.resource_project,
+ local.resource_type_mapping[each.value.resource_spec_type],
+ each.value.resource_name
+ )
type = each.value.resource_spec_type
}
project = var.project_id
diff --git a/modules/cloud-dataplex/outputs.tf b/modules/cloud-dataplex/outputs.tf
index 7a68ff289b..0da4fcc242 100644
--- a/modules/cloud-dataplex/outputs.tf
+++ b/modules/cloud-dataplex/outputs.tf
@@ -16,21 +16,21 @@
output "assets" {
description = "Assets attached to the lake of Dataplex Lake."
- value = local.zone_assets[*]["asset_name"]
+ value = local.zone_assets[*]
}
output "id" {
description = "Fully qualified Dataplex Lake id."
- value = google_dataplex_lake.basic_lake.id
+ value = google_dataplex_lake.lake.id
}
output "lake" {
description = "The lake name of Dataplex Lake."
- value = google_dataplex_lake.basic_lake.name
+ value = google_dataplex_lake.lake.name
}
output "zones" {
description = "The zone name of Dataplex Lake."
- value = local.zone_assets[*]["zone_name"]
+ value = distinct(local.zone_assets[*]["zone_name"])
}
diff --git a/modules/cloud-dataplex/variables.tf b/modules/cloud-dataplex/variables.tf
index bbbba1c795..fa4e65211a 100644
--- a/modules/cloud-dataplex/variables.tf
+++ b/modules/cloud-dataplex/variables.tf
@@ -14,6 +14,13 @@
* limitations under the License.
*/
+variable "iam" {
+ description = "Dataplex lake IAM bindings in {ROLE => [MEMBERS]} format."
+ type = map(list(string))
+ default = {}
+ nullable = false
+}
+
variable "location_type" {
description = "The location type of the Dataplax Lake."
type = string
@@ -28,6 +35,7 @@ variable "name" {
variable "prefix" {
description = "Optional prefix used to generate Dataplex Lake."
type = string
+ default = null
}
variable "project_id" {
@@ -45,11 +53,21 @@ variable "zones" {
type = map(object({
type = string
discovery = optional(bool, true)
+ iam = optional(map(list(string)), null)
assets = map(object({
- bucket_name = string
+ resource_name = string
+ resource_project = optional(string)
cron_schedule = optional(string, "15 15 * * *")
discovery_spec_enabled = optional(bool, true)
resource_spec_type = optional(string, "STORAGE_BUCKET")
}))
}))
+ validation {
+ condition = alltrue(flatten([
+ for k, v in var.zones : [
+ for kk, vv in v.assets : contains(["BIGQUERY_DATASET", "STORAGE_BUCKET"], vv.resource_spec_type)
+ ]
+ ]))
+ error_message = "Asset spect type must be one of 'BIGQUERY_DATASET' or 'STORAGE_BUCKET'."
+ }
}
diff --git a/modules/project/README.md b/modules/project/README.md
index d44aca5756..127f215f11 100644
--- a/modules/project/README.md
+++ b/modules/project/README.md
@@ -209,6 +209,7 @@ This table lists all affected services and roles that you need to grant to servi
| artifactregistry.googleapis.com | artifactregistry | roles/artifactregistry.serviceAgent |
| cloudasset.googleapis.com | cloudasset | roles/cloudasset.serviceAgent |
| cloudbuild.googleapis.com | cloudbuild | roles/cloudbuild.builds.builder |
+| dataplex.googleapis.com | dataplex | roles/dataplex.serviceAgent |
| gkehub.googleapis.com | fleet | roles/gkehub.serviceAgent |
| meshconfig.googleapis.com | servicemesh | roles/anthosservicemesh.serviceAgent |
| multiclusteringress.googleapis.com | multicluster-ingress | roles/multiclusteringress.serviceAgent |
diff --git a/modules/project/service-agents.yaml b/modules/project/service-agents.yaml
index 856c650b13..4ef3cafdb9 100644
--- a/modules/project/service-agents.yaml
+++ b/modules/project/service-agents.yaml
@@ -155,6 +155,7 @@
service_agent: "service-%s@gcp-sa-datapipelines.iam.gserviceaccount.com"
- name: "dataplex"
service_agent: "service-%s@gcp-sa-dataplex.iam.gserviceaccount.com"
+ jit: true # roles/dataplex.serviceAgent
- name: "dataproc"
service_agent: "service-%s@dataproc-accounts.iam.gserviceaccount.com"
- name: "datastream"