Skip to content

Commit

Permalink
Minimal Data Platform - Make components optional (#1380)
Browse files Browse the repository at this point in the history
make some components optional: Composer and Data Proc history server.
  • Loading branch information
lcaggio authored May 16, 2023
1 parent 6a89d71 commit 40656a2
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ locals {
iam_lnd = {
"roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email]
"roles/storage.objectViewer" = [module.processing-sa-cmp-0.iam_email]
"roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ locals {
GCP_REGION = var.region
LAND_PRJ = module.land-project.project_id
LAND_GCS = module.land-cs-0.name
PHS_CLUSTER_NAME = module.processing-dp-historyserver.name
PHS_CLUSTER_NAME = try(module.processing-dp-historyserver[0].name, null)
PROCESSING_GCS = module.processing-cs-0.name
PROCESSING_PRJ = module.processing-project.project_id
PROCESSING_SA_DP = module.processing-sa-dp-0.email
PROCESSING_SA = module.processing-sa-0.email
PROCESSING_SUBNET = local.processing_subnet
PROCESSING_VPC = local.processing_vpc
}
Expand All @@ -47,7 +47,7 @@ module "processing-sa-cmp-0" {
}

resource "google_composer_environment" "processing-cmp-0" {
count = var.composer_config.disable_deployment == true ? 0 : 1
count = var.enable_services.composer == true ? 1 : 0
project = module.processing-project.project_id
name = "${var.prefix}-prc-cmp-0"
region = var.region
Expand Down
28 changes: 15 additions & 13 deletions blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

# tfdoc:file:description Cloud Dataproc resources.

module "processing-cs-dp-history" {
module "processing-dp-history" {
count = var.enable_services.dataproc_history_server == true ? 1 : 0
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -24,12 +25,12 @@ module "processing-cs-dp-history" {
encryption_key = var.service_encryption_keys.storage
}

module "processing-sa-dp-0" {
module "processing-sa-0" {
source = "../../../modules/iam-service-account"
project_id = module.processing-project.project_id
prefix = var.prefix
name = "prc-dp-0"
display_name = "Dataproc service account"
name = "prc-0"
display_name = "Processing service account"
iam = {
"roles/iam.serviceAccountTokenCreator" = [
local.groups_iam.data-engineers,
Expand All @@ -41,7 +42,7 @@ module "processing-sa-dp-0" {
}
}

module "processing-dp-staging-0" {
module "processing-staging-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -51,7 +52,7 @@ module "processing-dp-staging-0" {
encryption_key = var.service_encryption_keys.storage
}

module "processing-dp-temp-0" {
module "processing-temp-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -61,7 +62,7 @@ module "processing-dp-temp-0" {
encryption_key = var.service_encryption_keys.storage
}

module "processing-dp-log-0" {
module "processing-log-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -72,19 +73,20 @@ module "processing-dp-log-0" {
}

module "processing-dp-historyserver" {
count = var.enable_services.dataproc_history_server == true ? 1 : 0
source = "../../../modules/dataproc"
project_id = module.processing-project.project_id
name = "hystory-server"
name = "history-server"
prefix = var.prefix
region = var.region
dataproc_config = {
cluster_config = {
staging_bucket = module.processing-dp-staging-0.name
temp_bucket = module.processing-dp-temp-0.name
staging_bucket = module.processing-staging-0.name
temp_bucket = module.processing-temp-0.name
gce_cluster_config = {
subnetwork = module.processing-vpc[0].subnets["${var.region}/${var.prefix}-processing"].self_link
zone = "${var.region}-b"
service_account = module.processing-sa-dp-0.email
service_account = module.processing-sa-0.email
service_account_scopes = ["cloud-platform"]
internal_ip_only = true
}
Expand All @@ -99,10 +101,10 @@ module "processing-dp-historyserver" {
"dataproc:dataproc.allow.zero.workers" = "true"
"dataproc:job.history.to-gcs.enabled" = "true"
"spark:spark.history.fs.logDirectory" = (
"gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
"gs://${module.processing-staging-0.name}/*/spark-job-history"
)
"spark:spark.eventLog.dir" = (
"gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
"gs://${module.processing-staging-0.name}/*/spark-job-history"
)
"spark:spark.history.custom.executor.log.url.applyIncompleteApplication" = "false"
"spark:spark.history.custom.executor.log.url" = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ locals {
module.processing-sa-cmp-0.iam_email
]
"roles/dataproc.worker" = [
module.processing-sa-dp-0.iam_email
module.processing-sa-0.iam_email
]
"roles/iam.serviceAccountUser" = [
module.processing-sa-cmp-0.iam_email, local.groups_iam.data-engineers
Expand Down
6 changes: 3 additions & 3 deletions blueprints/data-solutions/data-platform-minimal/03-curated.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

locals {
cur_iam = {
"roles/bigquery.dataOwner" = [module.processing-sa-dp-0.iam_email]
"roles/bigquery.dataOwner" = [module.processing-sa-0.iam_email]
"roles/bigquery.dataViewer" = [
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
"roles/bigquery.jobUser" = [
module.processing-sa-dp-0.iam_email,
module.processing-sa-0.iam_email,
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
Expand All @@ -35,7 +35,7 @@ locals {
"roles/storage.objectViewer" = [
local.groups_iam.data-analysts, local.groups_iam.data-engineers
]
"roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
cur_services = [
"iam.googleapis.com",
Expand Down
6 changes: 3 additions & 3 deletions blueprints/data-solutions/data-platform-minimal/04-common.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ locals {
"roles/dlp.estimatesAdmin" = [local.groups_iam.data-engineers]
"roles/dlp.reader" = [local.groups_iam.data-engineers]
"roles/dlp.user" = [
module.processing-sa-dp-0.iam_email,
module.processing-sa-0.iam_email,
local.groups_iam.data-engineers
]
"roles/datacatalog.admin" = [local.groups_iam.data-security]
"roles/datacatalog.viewer" = [
module.processing-sa-dp-0.iam_email,
module.processing-sa-0.iam_email,
local.groups_iam.data-analysts
]
"roles/datacatalog.categoryFineGrainedReader" = [
module.processing-sa-dp-0.iam_email
module.processing-sa-0.iam_email
]
}
}
Expand Down
41 changes: 21 additions & 20 deletions blueprints/data-solutions/data-platform-minimal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,8 @@ network_config = {
host_project = "PROJECT_ID"
network_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/global/networks/NAME"
subnet_self_links = {
processing_dataproc = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
processing_transformation = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
}
composer_ip_ranges = {
cloudsql = "192.168.XXX.XXX/24"
Expand Down Expand Up @@ -280,29 +280,30 @@ The application layer is out of scope of this script. As a demo purpuse only, on

| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [organization_domain](variables.tf#L114) | Organization domain. | <code>string</code> || |
| [prefix](variables.tf#L119) | Prefix used for resource names. | <code>string</code> || |
| [project_config](variables.tf#L128) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object&#40;&#123;&#10; billing_account_id &#61; optional&#40;string, null&#41;&#10; parent &#61; string&#10; project_ids &#61; optional&#40;object&#40;&#123;&#10; landing &#61; string&#10; processing &#61; string&#10; curated &#61; string&#10; common &#61; string&#10; &#125;&#41;, &#123;&#10; landing &#61; &#34;lnd&#34;&#10; processing &#61; &#34;prc&#34;&#10; curated &#61; &#34;cur&#34;&#10; common &#61; &#34;cmn&#34;&#10; &#125;&#10; &#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> || |
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object&#40;&#123;&#10; disable_deployment &#61; optional&#40;bool, false&#41;&#10; environment_size &#61; optional&#40;string, &#34;ENVIRONMENT_SIZE_SMALL&#34;&#41;&#10; software_config &#61; optional&#40;object&#40;&#123;&#10; airflow_config_overrides &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; pypi_packages &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; env_variables &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; image_version &#61; optional&#40;string, &#34;composer-2-airflow-2&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; workloads_config &#61; optional&#40;object&#40;&#123;&#10; scheduler &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; count &#61; optional&#40;number, 1&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; web_server &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; worker &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; min_count &#61; optional&#40;number, 1&#41;&#10; max_count &#61; optional&#40;number, 3&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [data_catalog_tags](variables.tf#L55) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> |
| [data_force_destroy](variables.tf#L66) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
| [groups](variables.tf#L72) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [location](variables.tf#L82) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> |
| [network_config](variables.tf#L88) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object&#40;&#123;&#10; host_project &#61; optional&#40;string&#41;&#10; network_self_link &#61; optional&#40;string&#41;&#10; subnet_self_links &#61; optional&#40;object&#40;&#123;&#10; processing_dataproc &#61; string&#10; processing_composer &#61; string&#10; &#125;&#41;, null&#41;&#10; composer_ip_ranges &#61; optional&#40;object&#40;&#123;&#10; connection_subnetwork &#61; optional&#40;string&#41;&#10; cloud_sql &#61; optional&#40;string, &#34;10.20.10.0&#47;24&#34;&#41;&#10; gke_master &#61; optional&#40;string, &#34;10.20.11.0&#47;28&#34;&#41;&#10; pods_range_name &#61; optional&#40;string, &#34;pods&#34;&#41;&#10; services_range_name &#61; optional&#40;string, &#34;services&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [project_suffix](variables.tf#L152) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
| [region](variables.tf#L158) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
| [service_encryption_keys](variables.tf#L164) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; optional&#40;string&#41;&#10; composer &#61; optional&#40;string&#41;&#10; compute &#61; optional&#40;string&#41;&#10; storage &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [organization_domain](variables.tf#L122) | Organization domain. | <code>string</code> || |
| [prefix](variables.tf#L127) | Prefix used for resource names. | <code>string</code> || |
| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object&#40;&#123;&#10; billing_account_id &#61; optional&#40;string, null&#41;&#10; parent &#61; string&#10; project_ids &#61; optional&#40;object&#40;&#123;&#10; landing &#61; string&#10; processing &#61; string&#10; curated &#61; string&#10; common &#61; string&#10; &#125;&#41;, &#123;&#10; landing &#61; &#34;lnd&#34;&#10; processing &#61; &#34;prc&#34;&#10; curated &#61; &#34;cur&#34;&#10; common &#61; &#34;cmn&#34;&#10; &#125;&#10; &#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> || |
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object&#40;&#123;&#10; environment_size &#61; optional&#40;string, &#34;ENVIRONMENT_SIZE_SMALL&#34;&#41;&#10; software_config &#61; optional&#40;object&#40;&#123;&#10; airflow_config_overrides &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; pypi_packages &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; env_variables &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; image_version &#61; optional&#40;string, &#34;composer-2-airflow-2&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; workloads_config &#61; optional&#40;object&#40;&#123;&#10; scheduler &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; count &#61; optional&#40;number, 1&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; web_server &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; worker &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; min_count &#61; optional&#40;number, 1&#41;&#10; max_count &#61; optional&#40;number, 3&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [data_catalog_tags](variables.tf#L54) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> |
| [data_force_destroy](variables.tf#L65) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
| [enable_services](variables.tf#L71) | Flag to enable or disable services in the Data Platform. | <code title="object&#40;&#123;&#10; composer &#61; optional&#40;bool, true&#41;&#10; dataproc_history_server &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [groups](variables.tf#L80) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [location](variables.tf#L90) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> |
| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object&#40;&#123;&#10; host_project &#61; optional&#40;string&#41;&#10; network_self_link &#61; optional&#40;string&#41;&#10; subnet_self_links &#61; optional&#40;object&#40;&#123;&#10; processing_transformation &#61; string&#10; processing_composer &#61; string&#10; &#125;&#41;, null&#41;&#10; composer_ip_ranges &#61; optional&#40;object&#40;&#123;&#10; connection_subnetwork &#61; optional&#40;string&#41;&#10; cloud_sql &#61; optional&#40;string, &#34;10.20.10.0&#47;24&#34;&#41;&#10; gke_master &#61; optional&#40;string, &#34;10.20.11.0&#47;28&#34;&#41;&#10; pods_range_name &#61; optional&#40;string, &#34;pods&#34;&#41;&#10; services_range_name &#61; optional&#40;string, &#34;services&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
| [region](variables.tf#L166) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; optional&#40;string&#41;&#10; composer &#61; optional&#40;string&#41;&#10; compute &#61; optional&#40;string&#41;&#10; storage &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |

## Outputs

| name | description | sensitive |
|---|---|:---:|
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
| [dataproc-hystory-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
| [gcs-buckets](outputs.tf#L34) | GCS buckets. ||
| [kms_keys](outputs.tf#L44) | Cloud MKS keys. | |
| [projects](outputs.tf#L49) | GCP Projects informations. | |
| [vpc_network](outputs.tf#L67) | VPC network. | |
| [vpc_subnet](outputs.tf#L75) | VPC subnetworks. | |
| [dataproc-history-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
| [gcs-buckets](outputs.tf#L29) | GCS buckets. ||
| [kms_keys](outputs.tf#L39) | Cloud MKS keys. | |
| [projects](outputs.tf#L44) | GCP Projects informations. | |
| [vpc_network](outputs.tf#L62) | VPC network. | |
| [vpc_subnet](outputs.tf#L70) | VPC subnetworks. | |

<!-- END TFDOC -->
Loading

0 comments on commit 40656a2

Please sign in to comment.