Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minimal Data Platform - Make components optional #1380

Merged
merged 3 commits into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ locals {
iam_lnd = {
"roles/storage.objectCreator" = [module.land-sa-cs-0.iam_email]
"roles/storage.objectViewer" = [module.processing-sa-cmp-0.iam_email]
"roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ locals {
GCP_REGION = var.region
LAND_PRJ = module.land-project.project_id
LAND_GCS = module.land-cs-0.name
PHS_CLUSTER_NAME = module.processing-dp-historyserver.name
PHS_CLUSTER_NAME = try(module.processing-dp-historyserver[0].name, null)
PROCESSING_GCS = module.processing-cs-0.name
PROCESSING_PRJ = module.processing-project.project_id
PROCESSING_SA_DP = module.processing-sa-dp-0.email
PROCESSING_SA = module.processing-sa-0.email
PROCESSING_SUBNET = local.processing_subnet
PROCESSING_VPC = local.processing_vpc
}
Expand All @@ -47,7 +47,7 @@ module "processing-sa-cmp-0" {
}

resource "google_composer_environment" "processing-cmp-0" {
count = var.composer_config.disable_deployment == true ? 0 : 1
count = var.enable_services.composer == true ? 1 : 0
project = module.processing-project.project_id
name = "${var.prefix}-prc-cmp-0"
region = var.region
Expand Down
28 changes: 15 additions & 13 deletions blueprints/data-solutions/data-platform-minimal/02-dataproc.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

# tfdoc:file:description Cloud Dataproc resources.

module "processing-cs-dp-history" {
module "processing-dp-history" {
count = var.enable_services.dataproc_history_server == true ? 1 : 0
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -24,12 +25,12 @@ module "processing-cs-dp-history" {
encryption_key = var.service_encryption_keys.storage
}

module "processing-sa-dp-0" {
module "processing-sa-0" {
source = "../../../modules/iam-service-account"
project_id = module.processing-project.project_id
prefix = var.prefix
name = "prc-dp-0"
display_name = "Dataproc service account"
name = "prc-0"
display_name = "Processing service account"
iam = {
"roles/iam.serviceAccountTokenCreator" = [
local.groups_iam.data-engineers,
Expand All @@ -41,7 +42,7 @@ module "processing-sa-dp-0" {
}
}

module "processing-dp-staging-0" {
module "processing-staging-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -51,7 +52,7 @@ module "processing-dp-staging-0" {
encryption_key = var.service_encryption_keys.storage
}

module "processing-dp-temp-0" {
module "processing-temp-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -61,7 +62,7 @@ module "processing-dp-temp-0" {
encryption_key = var.service_encryption_keys.storage
}

module "processing-dp-log-0" {
module "processing-log-0" {
source = "../../../modules/gcs"
project_id = module.processing-project.project_id
prefix = var.prefix
Expand All @@ -72,19 +73,20 @@ module "processing-dp-log-0" {
}

module "processing-dp-historyserver" {
count = var.enable_services.dataproc_history_server == true ? 1 : 0
source = "../../../modules/dataproc"
project_id = module.processing-project.project_id
name = "hystory-server"
name = "history-server"
prefix = var.prefix
region = var.region
dataproc_config = {
cluster_config = {
staging_bucket = module.processing-dp-staging-0.name
temp_bucket = module.processing-dp-temp-0.name
staging_bucket = module.processing-staging-0.name
temp_bucket = module.processing-temp-0.name
gce_cluster_config = {
subnetwork = module.processing-vpc[0].subnets["${var.region}/${var.prefix}-processing"].self_link
zone = "${var.region}-b"
service_account = module.processing-sa-dp-0.email
service_account = module.processing-sa-0.email
service_account_scopes = ["cloud-platform"]
internal_ip_only = true
}
Expand All @@ -99,10 +101,10 @@ module "processing-dp-historyserver" {
"dataproc:dataproc.allow.zero.workers" = "true"
"dataproc:job.history.to-gcs.enabled" = "true"
"spark:spark.history.fs.logDirectory" = (
"gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
"gs://${module.processing-staging-0.name}/*/spark-job-history"
)
"spark:spark.eventLog.dir" = (
"gs://${module.processing-dp-staging-0.name}/*/spark-job-history"
"gs://${module.processing-staging-0.name}/*/spark-job-history"
)
"spark:spark.history.custom.executor.log.url.applyIncompleteApplication" = "false"
"spark:spark.history.custom.executor.log.url" = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ locals {
module.processing-sa-cmp-0.iam_email
]
"roles/dataproc.worker" = [
module.processing-sa-dp-0.iam_email
module.processing-sa-0.iam_email
]
"roles/iam.serviceAccountUser" = [
module.processing-sa-cmp-0.iam_email, local.groups_iam.data-engineers
Expand Down
6 changes: 3 additions & 3 deletions blueprints/data-solutions/data-platform-minimal/03-curated.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

locals {
cur_iam = {
"roles/bigquery.dataOwner" = [module.processing-sa-dp-0.iam_email]
"roles/bigquery.dataOwner" = [module.processing-sa-0.iam_email]
"roles/bigquery.dataViewer" = [
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
"roles/bigquery.jobUser" = [
module.processing-sa-dp-0.iam_email,
module.processing-sa-0.iam_email,
local.groups_iam.data-analysts,
local.groups_iam.data-engineers
]
Expand All @@ -35,7 +35,7 @@ locals {
"roles/storage.objectViewer" = [
local.groups_iam.data-analysts, local.groups_iam.data-engineers
]
"roles/storage.objectAdmin" = [module.processing-sa-dp-0.iam_email]
"roles/storage.objectAdmin" = [module.processing-sa-0.iam_email]
}
cur_services = [
"iam.googleapis.com",
Expand Down
6 changes: 3 additions & 3 deletions blueprints/data-solutions/data-platform-minimal/04-common.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ locals {
"roles/dlp.estimatesAdmin" = [local.groups_iam.data-engineers]
"roles/dlp.reader" = [local.groups_iam.data-engineers]
"roles/dlp.user" = [
module.processing-sa-dp-0.iam_email,
module.processing-sa-0.iam_email,
local.groups_iam.data-engineers
]
"roles/datacatalog.admin" = [local.groups_iam.data-security]
"roles/datacatalog.viewer" = [
module.processing-sa-dp-0.iam_email,
module.processing-sa-0.iam_email,
local.groups_iam.data-analysts
]
"roles/datacatalog.categoryFineGrainedReader" = [
module.processing-sa-dp-0.iam_email
module.processing-sa-0.iam_email
]
}
}
Expand Down
41 changes: 21 additions & 20 deletions blueprints/data-solutions/data-platform-minimal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,8 @@ network_config = {
host_project = "PROJECT_ID"
network_self_link = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/global/networks/NAME"
subnet_self_links = {
processing_dataproc = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
processing_transformation = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
processing_composer = "https://www.googleapis.com/compute/v1/projects/PROJECT_ID/regions/REGION/subnetworks/NAME"
}
composer_ip_ranges = {
cloudsql = "192.168.XXX.XXX/24"
Expand Down Expand Up @@ -280,29 +280,30 @@ The application layer is out of scope of this script. As a demo purpuse only, on

| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [organization_domain](variables.tf#L114) | Organization domain. | <code>string</code> | ✓ | |
| [prefix](variables.tf#L119) | Prefix used for resource names. | <code>string</code> | ✓ | |
| [project_config](variables.tf#L128) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object&#40;&#123;&#10; billing_account_id &#61; optional&#40;string, null&#41;&#10; parent &#61; string&#10; project_ids &#61; optional&#40;object&#40;&#123;&#10; landing &#61; string&#10; processing &#61; string&#10; curated &#61; string&#10; common &#61; string&#10; &#125;&#41;, &#123;&#10; landing &#61; &#34;lnd&#34;&#10; processing &#61; &#34;prc&#34;&#10; curated &#61; &#34;cur&#34;&#10; common &#61; &#34;cmn&#34;&#10; &#125;&#10; &#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | |
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object&#40;&#123;&#10; disable_deployment &#61; optional&#40;bool, false&#41;&#10; environment_size &#61; optional&#40;string, &#34;ENVIRONMENT_SIZE_SMALL&#34;&#41;&#10; software_config &#61; optional&#40;object&#40;&#123;&#10; airflow_config_overrides &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; pypi_packages &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; env_variables &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; image_version &#61; optional&#40;string, &#34;composer-2-airflow-2&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; workloads_config &#61; optional&#40;object&#40;&#123;&#10; scheduler &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; count &#61; optional&#40;number, 1&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; web_server &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; worker &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; min_count &#61; optional&#40;number, 1&#41;&#10; max_count &#61; optional&#40;number, 3&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [data_catalog_tags](variables.tf#L55) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> |
| [data_force_destroy](variables.tf#L66) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
| [groups](variables.tf#L72) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [location](variables.tf#L82) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> |
| [network_config](variables.tf#L88) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object&#40;&#123;&#10; host_project &#61; optional&#40;string&#41;&#10; network_self_link &#61; optional&#40;string&#41;&#10; subnet_self_links &#61; optional&#40;object&#40;&#123;&#10; processing_dataproc &#61; string&#10; processing_composer &#61; string&#10; &#125;&#41;, null&#41;&#10; composer_ip_ranges &#61; optional&#40;object&#40;&#123;&#10; connection_subnetwork &#61; optional&#40;string&#41;&#10; cloud_sql &#61; optional&#40;string, &#34;10.20.10.0&#47;24&#34;&#41;&#10; gke_master &#61; optional&#40;string, &#34;10.20.11.0&#47;28&#34;&#41;&#10; pods_range_name &#61; optional&#40;string, &#34;pods&#34;&#41;&#10; services_range_name &#61; optional&#40;string, &#34;services&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [project_suffix](variables.tf#L152) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
| [region](variables.tf#L158) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
| [service_encryption_keys](variables.tf#L164) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; optional&#40;string&#41;&#10; composer &#61; optional&#40;string&#41;&#10; compute &#61; optional&#40;string&#41;&#10; storage &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [organization_domain](variables.tf#L122) | Organization domain. | <code>string</code> | ✓ | |
| [prefix](variables.tf#L127) | Prefix used for resource names. | <code>string</code> | ✓ | |
| [project_config](variables.tf#L136) | Provide 'billing_account_id' value if project creation is needed, uses existing 'project_ids' if null. Parent is in 'folders/nnn' or 'organizations/nnn' format. | <code title="object&#40;&#123;&#10; billing_account_id &#61; optional&#40;string, null&#41;&#10; parent &#61; string&#10; project_ids &#61; optional&#40;object&#40;&#123;&#10; landing &#61; string&#10; processing &#61; string&#10; curated &#61; string&#10; common &#61; string&#10; &#125;&#41;, &#123;&#10; landing &#61; &#34;lnd&#34;&#10; processing &#61; &#34;prc&#34;&#10; curated &#61; &#34;cur&#34;&#10; common &#61; &#34;cmn&#34;&#10; &#125;&#10; &#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | ✓ | |
| [composer_config](variables.tf#L17) | Cloud Composer config. | <code title="object&#40;&#123;&#10; environment_size &#61; optional&#40;string, &#34;ENVIRONMENT_SIZE_SMALL&#34;&#41;&#10; software_config &#61; optional&#40;object&#40;&#123;&#10; airflow_config_overrides &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; pypi_packages &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; env_variables &#61; optional&#40;map&#40;string&#41;, &#123;&#125;&#41;&#10; image_version &#61; optional&#40;string, &#34;composer-2-airflow-2&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; workloads_config &#61; optional&#40;object&#40;&#123;&#10; scheduler &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; count &#61; optional&#40;number, 1&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; web_server &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10; worker &#61; optional&#40;object&#40;&#123;&#10; cpu &#61; optional&#40;number, 0.5&#41;&#10; memory_gb &#61; optional&#40;number, 1.875&#41;&#10; storage_gb &#61; optional&#40;number, 1&#41;&#10; min_count &#61; optional&#40;number, 1&#41;&#10; max_count &#61; optional&#40;number, 3&#41;&#10; &#125;&#10; &#41;, &#123;&#125;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [data_catalog_tags](variables.tf#L54) | List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format. | <code>map&#40;map&#40;list&#40;string&#41;&#41;&#41;</code> | | <code title="&#123;&#10; &#34;3_Confidential&#34; &#61; null&#10; &#34;2_Private&#34; &#61; null&#10; &#34;1_Sensitive&#34; &#61; null&#10;&#125;">&#123;&#8230;&#125;</code> |
| [data_force_destroy](variables.tf#L65) | Flag to set 'force_destroy' on data services like BiguQery or Cloud Storage. | <code>bool</code> | | <code>false</code> |
| [enable_services](variables.tf#L71) | Flag to enable or disable services in the Data Platform. | <code title="object&#40;&#123;&#10; composer &#61; optional&#40;bool, true&#41;&#10; dataproc_history_server &#61; optional&#40;bool, true&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [groups](variables.tf#L80) | User groups. | <code>map&#40;string&#41;</code> | | <code title="&#123;&#10; data-analysts &#61; &#34;gcp-data-analysts&#34;&#10; data-engineers &#61; &#34;gcp-data-engineers&#34;&#10; data-security &#61; &#34;gcp-data-security&#34;&#10;&#125;">&#123;&#8230;&#125;</code> |
| [location](variables.tf#L90) | Location used for multi-regional resources. | <code>string</code> | | <code>&#34;eu&#34;</code> |
| [network_config](variables.tf#L96) | Shared VPC network configurations to use. If null networks will be created in projects. | <code title="object&#40;&#123;&#10; host_project &#61; optional&#40;string&#41;&#10; network_self_link &#61; optional&#40;string&#41;&#10; subnet_self_links &#61; optional&#40;object&#40;&#123;&#10; processing_transformation &#61; string&#10; processing_composer &#61; string&#10; &#125;&#41;, null&#41;&#10; composer_ip_ranges &#61; optional&#40;object&#40;&#123;&#10; connection_subnetwork &#61; optional&#40;string&#41;&#10; cloud_sql &#61; optional&#40;string, &#34;10.20.10.0&#47;24&#34;&#41;&#10; gke_master &#61; optional&#40;string, &#34;10.20.11.0&#47;28&#34;&#41;&#10; pods_range_name &#61; optional&#40;string, &#34;pods&#34;&#41;&#10; services_range_name &#61; optional&#40;string, &#34;services&#34;&#41;&#10; &#125;&#41;, &#123;&#125;&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |
| [project_suffix](variables.tf#L160) | Suffix used only for project ids. | <code>string</code> | | <code>null</code> |
| [region](variables.tf#L166) | Region used for regional resources. | <code>string</code> | | <code>&#34;europe-west1&#34;</code> |
| [service_encryption_keys](variables.tf#L172) | Cloud KMS to use to encrypt different services. Key location should match service region. | <code title="object&#40;&#123;&#10; bq &#61; optional&#40;string&#41;&#10; composer &#61; optional&#40;string&#41;&#10; compute &#61; optional&#40;string&#41;&#10; storage &#61; optional&#40;string&#41;&#10;&#125;&#41;">object&#40;&#123;&#8230;&#125;&#41;</code> | | <code>&#123;&#125;</code> |

## Outputs

| name | description | sensitive |
|---|---|:---:|
| [bigquery-datasets](outputs.tf#L17) | BigQuery datasets. | |
| [dataproc-hystory-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
| [gcs-buckets](outputs.tf#L34) | GCS buckets. | ✓ |
| [kms_keys](outputs.tf#L44) | Cloud MKS keys. | |
| [projects](outputs.tf#L49) | GCP Projects informations. | |
| [vpc_network](outputs.tf#L67) | VPC network. | |
| [vpc_subnet](outputs.tf#L75) | VPC subnetworks. | |
| [dataproc-history-server](outputs.tf#L24) | List of bucket names which have been assigned to the cluster. | |
| [gcs-buckets](outputs.tf#L29) | GCS buckets. | ✓ |
| [kms_keys](outputs.tf#L39) | Cloud MKS keys. | |
| [projects](outputs.tf#L44) | GCP Projects informations. | |
| [vpc_network](outputs.tf#L62) | VPC network. | |
| [vpc_subnet](outputs.tf#L70) | VPC subnetworks. | |

<!-- END TFDOC -->
Loading