diff --git a/blueprints/data-solutions/data-platform-minimal/01-landing.tf b/blueprints/data-solutions/data-platform-minimal/01-landing.tf
index c6edddd070..48eb9969c0 100644
--- a/blueprints/data-solutions/data-platform-minimal/01-landing.tf
+++ b/blueprints/data-solutions/data-platform-minimal/01-landing.tf
@@ -18,7 +18,7 @@ locals {
iam_lnd = {
"roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email]
"roles/storage.objectViewer" = [module.processing-sa-cmp-0.iam_email]
- "roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
+ "roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
}
diff --git a/blueprints/data-solutions/data-platform-minimal/02-composer.tf b/blueprints/data-solutions/data-platform-minimal/02-composer.tf
index 616d80ad00..de7d1738bc 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-composer.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-composer.tf
@@ -25,10 +25,10 @@ locals {
GCP_REGION = var.region
LAND_PRJ = module.land-project.project_id
LAND_GCS = module.land-cs-0.name
- PHS_CLUSTER_NAME = module.processing-dp-historyserver.name
+ PHS_CLUSTER_NAME = try(module.processing-dp-historyserver[0].name, null)
PROCESSING_GCS = module.processing-cs-0.name
PROCESSING_PRJ = module.processing-project.project_id
- PROCESSING_SA_DP = module.processing-sa-dp-0.email
+ PROCESSING_SA = module.processing-sa-0.email
PROCESSING_SUBNET = local.processing_subnet
PROCESSING_VPC = local.processing_vpc
}
@@ -47,7 +47,7 @@ module "processing-sa-cmp-0" {
}
resource "google_composer_environment" "processing-cmp-0" {
- count = var.composer_config.disable_deployment == true ? 0 : 1
+ count = var.enable_services.composer == true ? 1 : 0
project = module.processing-project.project_id
name = "${var.prefix}-prc-cmp-0"
region = var.region
diff --git a/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf b/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
index 1161abf018..4275c559a4 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
@@ -14,7 +14,8 @@
# tfdoc:file:description Cloud Dataproc resources.
-module "processing-cs-dp-history" {
+module "processing-dp-history" {
+ count = var.enable_services.dataproc_history_server == true ? 1 : 0
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
@@ -24,12 +25,12 @@ module "processing-cs-dp-history" {
encryption_key = var.service_encryption_keys.storage
}
-module "processing-sa-dp-0" {
+module "processing-sa-0" {
source = "../../../modules/iam-service-account"
project_id = module.processing-project.project_id
prefix = var.prefix
- name = "prc-dp-0"
- display_name = "Dataproc service account"
+ name = "prc-0"
+ display_name = "Processing service account"
iam = {
"roles/iam.serviceAccountTokenCreator" = [
local.groups_iam.data-engineers,
@@ -41,7 +42,7 @@ module "processing-sa-dp-0" {
}
}
-module "processing-dp-staging-0" {
+module "processing-staging-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
@@ -51,7 +52,7 @@ module "processing-dp-staging-0" {
encryption_key = var.service_encryption_keys.storage
}
-module "processing-dp-temp-0" {
+module "processing-temp-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
@@ -61,7 +62,7 @@ module "processing-dp-temp-0" {
encryption_key = var.service_encryption_keys.storage
}
-module "processing-dp-log-0" {
+module "processing-log-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
@@ -72,19 +73,20 @@ module "processing-dp-log-0" {
}
module "processing-dp-historyserver" {
+ count = var.enable_services.dataproc_history_server == true ? 1 : 0
source = "../../../modules/dataproc"
project_id = module.processing-project.project_id
- name = "hystory-server"
+ name = "history-server"
prefix = var.prefix
region = var.region
dataproc_config = {
cluster_config = {
- staging_bucket = module.processing-dp-staging-0.name
- temp_bucket = module.processing-dp-temp-0.name
+ staging_bucket = module.processing-staging-0.name
+ temp_bucket = module.processing-temp-0.name
gce_cluster_config = {
subnetwork = module.processing-vpc[0].subnets["${var.region}/${var.prefix}-processing"].self_link
zone = "${var.region}-b"
- service_account = module.processing-sa-dp-0.email
+ service_account = module.processing-sa-0.email
service_account_scopes = ["cloud-platform"]
internal_ip_only = true
}
@@ -99,10 +101,10 @@ module "processing-dp-historyserver" {
"dataproc:dataproc.allow.zero.workers" = "true"
"dataproc:job.history.to-gcs.enabled" = "true"
"spark:spark.history.fs.logDirectory" = (
- "gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
+ "gs://${module.processing-staging-0.name}/*/spark-job-history"
)
"spark:spark.eventLog.dir" = (
- "gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
+ "gs://${module.processing-staging-0.name}/*/spark-job-history"
)
"spark:spark.history.custom.executor.log.url.applyIncompleteApplication" = "false"
"spark:spark.history.custom.executor.log.url" = (
diff --git a/blueprints/data-solutions/data-platform-minimal/02-processing.tf b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
index 9bbb623430..6dbd13331b 100644
--- a/blueprints/data-solutions/data-platform-minimal/02-processing.tf
+++ b/blueprints/data-solutions/data-platform-minimal/02-processing.tf
@@ -28,7 +28,7 @@ locals {
module.processing-sa-cmp-0.iam_email
]
"roles/dataproc.worker" = [
- module.processing-sa-dp-0.iam_email
+ module.processing-sa-0.iam_email
]
"roles/iam.serviceAccountUser" = [
module.processing-sa-cmp-0.iam_email, local.groups_iam.data-engineers
diff --git a/blueprints/data-solutions/data-platform-minimal/03-curated.tf b/blueprints/data-solutions/data-platform-minimal/03-curated.tf
index 5b044b51ef..730e8d6cb7 100644
--- a/blueprints/data-solutions/data-platform-minimal/03-curated.tf
+++ b/blueprints/data-solutions/data-platform-minimal/03-curated.tf
@@ -16,13 +16,13 @@
locals {
cur_iam = {
- "roles/bigquery.dataOwner" = [module.processing-sa-dp-0.iam_email]
+ "roles/bigquery.dataOwner" = [module.processing-sa-0.iam_email]
"roles/bigquery.dataViewer" = [
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
"roles/bigquery.jobUser" = [
- module.processing-sa-dp-0.iam_email,
+ module.processing-sa-0.iam_email,
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
@@ -35,7 +35,7 @@ locals {
"roles/storage.objectViewer" = [
local.groups_iam.data-analysts, local.groups_iam.data-engineers
]
- "roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
+ "roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
cur_services = [
"iam.googleapis.com",
diff --git a/blueprints/data-solutions/data-platform-minimal/04-common.tf b/blueprints/data-solutions/data-platform-minimal/04-common.tf
index 3a2d01bdf0..52f6e84f07 100644
--- a/blueprints/data-solutions/data-platform-minimal/04-common.tf
+++ b/blueprints/data-solutions/data-platform-minimal/04-common.tf
@@ -20,16 +20,16 @@ locals {
"roles/dlp.estimatesAdmin" = [local.groups_iam.data-engineers]
"roles/dlp.reader" = [local.groups_iam.data-engineers]
"roles/dlp.user" = [
- module.processing-sa-dp-0.iam_email,
+ module.processing-sa-0.iam_email,
local.groups_iam.data-engineers
]
"roles/datacatalog.admin" = [local.groups_iam.data-security]
"roles/datacatalog.viewer" = [
- module.processing-sa-dp-0.iam_email,
+ module.processing-sa-0.iam_email,
local.groups_iam.data-analysts
]
"roles/datacatalog.categoryFineGrainedReader" = [
- module.processing-sa-dp-0.iam_email
+ module.processing-sa-0.iam_email
]
}
}
diff --git a/blueprints/data-solutions/data-platform-minimal/README.md b/blueprints/data-solutions/data-platform-minimal/README.md
index a1d60f1c60..f468911310 100644
--- a/blueprints/data-solutions/data-platform-minimal/README.md
+++ b/blueprints/data-solutions/data-platform-minimal/README.md
@@ -230,8 +230,8 @@ network_config = {
host_project = "PROJECT_ID"
network_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/global/networks/NAME"
subnet_self_links = {
- processing_dataproc = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
- processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
+ processing_transformation = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
+ processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
}
composer_ip_ranges = {
cloudsql = "192.168.XXX.XXX/24"
@@ -280,29 +280,30 @@ The application layer is out of scope of this script. As a demo purpuse only, on
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
-| [organization_domain](variables.tf#L114) | Organization domain. | string
| ✓ | |
-| [prefix](variables.tf#L119) | Prefix used for resource names. | string
| ✓ | |
-| [project_config](variables.tf#L128) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…})
| ✓ | |
-| [composer_config](variables.tf#L17) | Cloud Composer config. | object({…})
| | {}
|
-| [data_catalog_tags](variables.tf#L55) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string)))
| | {…}
|
-| [data_force_destroy](variables.tf#L66) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool
| | false
|
-| [groups](variables.tf#L72) | User groups. | map(string)
| | {…}
|
-| [location](variables.tf#L82) | Location used for multi-regional resources. | string
| | "eu"
|
-| [network_config](variables.tf#L88) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…})
| | {}
|
-| [project_suffix](variables.tf#L152) | Suffix used only for project ids. | string
| | null
|
-| [region](variables.tf#L158) | Region used for regional resources. | string
| | "europe-west1"
|
-| [service_encryption_keys](variables.tf#L164) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | {}
|
+| [organization_domain](variables.tf#L122) | Organization domain. | string
| ✓ | |
+| [prefix](variables.tf#L127) | Prefix used for resource names. | string
| ✓ | |
+| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | object({…})
| ✓ | |
+| [composer_config](variables.tf#L17) | Cloud Composer config. | object({…})
| | {}
|
+| [data_catalog_tags](variables.tf#L54) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | map(map(list(string)))
| | {…}
|
+| [data_force_destroy](variables.tf#L65) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | bool
| | false
|
+| [enable_services](variables.tf#L71) | Flag to enable or disable services in the Data Platform. | object({…})
| | {}
|
+| [groups](variables.tf#L80) | User groups. | map(string)
| | {…}
|
+| [location](variables.tf#L90) | Location used for multi-regional resources. | string
| | "eu"
|
+| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | object({…})
| | {}
|
+| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | string
| | null
|
+| [region](variables.tf#L166) | Region used for regional resources. | string
| | "europe-west1"
|
+| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | object({…})
| | {}
|
## Outputs
| name | description | sensitive |
|---|---|:---:|
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
-| [dataproc-hystory-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
-| [gcs-buckets](outputs.tf#L34) | GCS buckets. | ✓ |
-| [kms_keys](outputs.tf#L44) | Cloud MKS keys. | |
-| [projects](outputs.tf#L49) | GCP Projects informations. | |
-| [vpc_network](outputs.tf#L67) | VPC network. | |
-| [vpc_subnet](outputs.tf#L75) | VPC subnetworks. | |
+| [dataproc-history-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
+| [gcs-buckets](outputs.tf#L29) | GCS buckets. | ✓ |
+| [kms_keys](outputs.tf#L39) | Cloud MKS keys. | |
+| [projects](outputs.tf#L44) | GCP Projects informations. | |
+| [vpc_network](outputs.tf#L62) | VPC network. | |
+| [vpc_subnet](outputs.tf#L70) | VPC subnetworks. | |
diff --git a/blueprints/data-solutions/data-platform-minimal/demo/orchestrate_pyspark.py b/blueprints/data-solutions/data-platform-minimal/demo/orchestrate_pyspark.py
index ef5084ffe2..295fdd62fc 100644
--- a/blueprints/data-solutions/data-platform-minimal/demo/orchestrate_pyspark.py
+++ b/blueprints/data-solutions/data-platform-minimal/demo/orchestrate_pyspark.py
@@ -41,9 +41,9 @@
PHS_CLUSTER_NAME = os.environ.get("PHS_CLUSTER_NAME")
PROCESSING_GCS = os.environ.get("PROCESSING_GCS")
PROCESSING_PRJ = os.environ.get("PROCESSING_PRJ")
-PROCESSING_SA_DP = os.environ.get("PROCESSING_SA_DP")
-PROCESSING_SA_SUBNET = os.environ.get("PROCESSING_SUBNET")
-PROCESSING_SA_VPC = os.environ.get("PROCESSING_VPC")
+PROCESSING_SA = os.environ.get("PROCESSING_SA")
+PROCESSING_SUBNET = os.environ.get("PROCESSING_SUBNET")
+PROCESSING_VPC = os.environ.get("PROCESSING_VPC")
PYTHON_FILE_LOCATION = "gs://"+PROCESSING_GCS+"/pyspark_sort.py"
PHS_CLUSTER_PATH = "projects/"+PROCESSING_PRJ+"/regions/"+DP_REGION+"/clusters/"+PHS_CLUSTER_NAME
@@ -65,8 +65,8 @@
batch={
"environment_config": {
"execution_config": {
- "service_account": PROCESSING_SA_DP,
- "subnetwork_uri": PROCESSING_SA_SUBNET
+ "service_account": PROCESSING_SA,
+ "subnetwork_uri": PROCESSING_SUBNET
},
"peripherals_config": {
"spark_history_server_config":{
diff --git a/blueprints/data-solutions/data-platform-minimal/outputs.tf b/blueprints/data-solutions/data-platform-minimal/outputs.tf
index 97eda2a3f5..22e641a0a1 100644
--- a/blueprints/data-solutions/data-platform-minimal/outputs.tf
+++ b/blueprints/data-solutions/data-platform-minimal/outputs.tf
@@ -21,14 +21,9 @@ output "bigquery-datasets" {
}
}
-output "dataproc-hystory-server" {
+output "dataproc-history-server" {
description = "List of bucket names which have been assigned to the cluster."
- value = {
- bucket_names = module.processing-dp-historyserver.bucket_names
- http_ports = module.processing-dp-historyserver.http_ports
- instance_names = module.processing-dp-historyserver.instance_names
- name = module.processing-dp-historyserver.name
- }
+ value = one(module.processing-dp-historyserver)
}
output "gcs-buckets" {
@@ -67,15 +62,15 @@ output "projects" {
output "vpc_network" {
description = "VPC network."
value = {
- processing_dataproc = local.processing_vpc
- processing_composer = local.processing_vpc
+ processing_transformation = local.processing_vpc
+ processing_composer = local.processing_vpc
}
}
output "vpc_subnet" {
description = "VPC subnetworks."
value = {
- processing_dataproc = local.processing_subnet
- processing_composer = local.processing_subnet
+ processing_transformation = local.processing_subnet
+ processing_composer = local.processing_subnet
}
}
diff --git a/blueprints/data-solutions/data-platform-minimal/variables.tf b/blueprints/data-solutions/data-platform-minimal/variables.tf
index a63f07c38f..e6b62df6f3 100644
--- a/blueprints/data-solutions/data-platform-minimal/variables.tf
+++ b/blueprints/data-solutions/data-platform-minimal/variables.tf
@@ -17,8 +17,7 @@
variable "composer_config" {
description = "Cloud Composer config."
type = object({
- disable_deployment = optional(bool, false)
- environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL")
+ environment_size = optional(string, "ENVIRONMENT_SIZE_SMALL")
software_config = optional(object({
airflow_config_overrides = optional(map(string), {})
pypi_packages = optional(map(string), {})
@@ -69,6 +68,15 @@ variable "data_force_destroy" {
default = false
}
+variable "enable_services" {
+ description = "Flag to enable or disable services in the Data Platform."
+ type = object({
+ composer = optional(bool, true)
+ dataproc_history_server = optional(bool, true)
+ })
+ default = {}
+}
+
variable "groups" {
description = "User groups."
type = map(string)
@@ -91,8 +99,8 @@ variable "network_config" {
host_project = optional(string)
network_self_link = optional(string)
subnet_self_links = optional(object({
- processing_dataproc = string
- processing_composer = string
+ processing_transformation = string
+ processing_composer = string
}), null)
composer_ip_ranges = optional(object({
connection_subnetwork = optional(string)