From 5e2e47da5f337b0e4d716495e64d71855a3c60d1 Mon Sep 17 00:00:00 2001 From: Ludovico Magnocavallo Date: Mon, 26 Feb 2024 11:16:52 +0100 Subject: [PATCH] Factories refactor (#1843) * factories refactor doc * Adds file schema and filesystem organization * Update 20231106-factories.md * move factories out of blueprints and create new factories README * align factory in billing-account module * align factory in dataplex-datascan module * align factory in billing-account module * align factory in net-firewall-policy module * align factory in dns-response-policy module * align factory in net-vpc-firewall module * align factory in net-vpc module * align factory variable names in FAST * remove decentralized firewall blueprint * bump terraform version * bump module versions * update top-level READMEs * move project factory to modules * fix variable names and tests * tfdoc * remove changelog link * add project factory to top-level README * fix cludrun eventarc diff * fix README * fix cludrun eventarc diff --------- Co-authored-by: Simone Ruffilli --- README.md | 26 ++++---- factory.tf | 150 +++++++++++++++++++++++++++++++++++++++++++++++ main.tf | 121 ++++++++++++++++++++++++++++---------- rules_parsing.tf | 54 ----------------- variables.tf | 17 +++--- versions.tf | 6 +- 6 files changed, 265 insertions(+), 109 deletions(-) create mode 100644 factory.tf delete mode 100644 rules_parsing.tf diff --git a/README.md b/README.md index 4053b60699..4b9ecbccc3 100644 --- a/README.md +++ b/README.md @@ -161,8 +161,8 @@ module "dataplex-datascan" { resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations" } incremental_field = "modified_date" - data_quality_spec_file = { - path = "config/data_quality_spec.yaml" + factories_config = { + data_quality_spec = "config/data_quality_spec.yaml" } } # tftest modules=1 resources=1 files=data_quality_spec inventory=datascan_dq.yaml @@ -244,8 +244,8 @@ module "dataplex-datascan" { resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations" } incremental_field = "modified_date" - data_quality_spec_file = { - path = "config/data_quality_spec_camel_case.yaml" + factories_config = { + data_quality_spec = "config/data_quality_spec_camel_case.yaml" } } # tftest modules=1 resources=1 files=data_quality_spec_camel_case inventory=datascan_dq.yaml @@ -431,21 +431,21 @@ module "dataplex-datascan" { | name | description | type | required | default | |---|---|:---:|:---:|:---:| | [data](variables.tf#L17) | The data source for DataScan. The source can be either a Dataplex `entity` or a BigQuery `resource`. | object({…}) | ✓ | | -| [name](variables.tf#L118) | Name of Dataplex Scan. | string | ✓ | | -| [project_id](variables.tf#L129) | The ID of the project where the Dataplex DataScan will be created. | string | ✓ | | -| [region](variables.tf#L134) | Region for the Dataplex DataScan. | string | ✓ | | +| [name](variables.tf#L119) | Name of Dataplex Scan. | string | ✓ | | +| [project_id](variables.tf#L130) | The ID of the project where the Dataplex DataScan will be created. | string | ✓ | | +| [region](variables.tf#L135) | Region for the Dataplex DataScan. | string | ✓ | | | [data_profile_spec](variables.tf#L29) | DataProfileScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataProfileSpec. | object({…}) | | null | | [data_quality_spec](variables.tf#L38) | DataQualityScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec. | object({…}) | | null | -| [data_quality_spec_file](variables.tf#L85) | Path to a YAML file containing DataQualityScan related setting. Input content can use either camelCase or snake_case. Variables description are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec. | object({…}) | | null | -| [description](variables.tf#L93) | Custom description for DataScan. | string | | null | -| [execution_schedule](variables.tf#L99) | Schedule DataScan to run periodically based on a cron schedule expression. If not specified, the DataScan is created with `on_demand` schedule, which means it will not run until the user calls `dataScans.run` API. | string | | null | +| [description](variables.tf#L85) | Custom description for DataScan. | string | | null | +| [execution_schedule](variables.tf#L91) | Schedule DataScan to run periodically based on a cron schedule expression. If not specified, the DataScan is created with `on_demand` schedule, which means it will not run until the user calls `dataScans.run` API. | string | | null | +| [factories_config](variables.tf#L97) | Paths to data files and folders that enable factory functionality. | object({…}) | | {} | | [iam](variables-iam.tf#L24) | Dataplex DataScan IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string)) | | {} | | [iam_bindings](variables-iam.tf#L31) | Authoritative IAM bindings in {KEY => {role = ROLE, members = [], condition = {}}}. Keys are arbitrary. | map(object({…})) | | {} | | [iam_bindings_additive](variables-iam.tf#L46) | Individual additive IAM bindings. Keys are arbitrary. | map(object({…})) | | {} | | [iam_by_principals](variables-iam.tf#L17) | Authoritative IAM binding in {PRINCIPAL => [ROLES]} format. Principals need to be statically defined to avoid cycle errors. Merged internally with the `iam` variable. | map(list(string)) | | {} | -| [incremental_field](variables.tf#L105) | The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table. | string | | null | -| [labels](variables.tf#L111) | Resource labels. | map(string) | | {} | -| [prefix](variables.tf#L123) | Optional prefix used to generate Dataplex DataScan ID. | string | | null | +| [incremental_field](variables.tf#L106) | The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table. | string | | null | +| [labels](variables.tf#L112) | Resource labels. | map(string) | | {} | +| [prefix](variables.tf#L124) | Optional prefix used to generate Dataplex DataScan ID. | string | | null | ## Outputs diff --git a/factory.tf b/factory.tf new file mode 100644 index 0000000000..964e232f12 --- /dev/null +++ b/factory.tf @@ -0,0 +1,150 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + _factory_data = ( + var.factories_config.data_quality_spec == null + ? null + : yamldecode(file(pathexpand(var.factories_config.data_quality_spec))) + ) + factory_data = { + post_scan_actions = try( + local._factory_data.postScanActions, + local._factory_data.post_scan_actions, + null + ) + row_filter = try( + local._factory_data.rowFilter, + local._factory_data.row_filter, + null + ) + rules = [ + for rule in try(local._factory_data.rules, []) : { + column = try(rule.column, null) + ignore_null = try(rule.ignoreNull, rule.ignore_null, null) + dimension = rule.dimension + threshold = try(rule.threshold, null) + non_null_expectation = try( + rule.nonNullExpectation, rule.non_null_expectation, null + ) + range_expectation = ( + can(rule.rangeExpectation) || can(rule.range_expectation) + ? { + min_value = try( + rule.rangeExpectation.minValue, + rule.range_expectation.min_value, + null + ) + max_value = try( + rule.rangeExpectation.maxValue, + rule.range_expectation.max_value, + null + ) + strict_min_enabled = try( + rule.rangeExpectation.strictMinEnabled, + rule.range_expectation.strict_min_enabled, + null + ) + strict_max_enabled = try( + rule.rangeExpectation.strictMaxEnabled, + rule.range_expectation.strict_max_enabled, + null + ) + } + : null + ) + regex_expectation = ( + can(rule.regexExpectation) || can(rule.regex_expectation) + ? { + regex = try( + rule.regexExpectation.regex, rule.regex_expectation.regex, null + ) + } + : null + ) + set_expectation = ( + can(rule.setExpectation) || can(rule.set_expectation) + ? { + values = try( + rule.setExpectation.values, rule.set_expectation.values, null + ) + } + : null + ) + uniqueness_expectation = try( + rule.uniquenessExpectation, rule.uniqueness_expectation, null + ) + statistic_range_expectation = ( + can(rule.statisticRangeExpectation) || can(rule.statistic_range_expectation) + ? { + statistic = try( + rule.statisticRangeExpectation.statistic, + rule.statistic_range_expectation.statistic + ) + min_value = try( + rule.statisticRangeExpectation.minValue, + rule.statistic_range_expectation.min_value, + null + ) + max_value = try( + rule.statisticRangeExpectation.maxValue, + rule.statistic_range_expectation.max_value, + null + ) + strict_min_enabled = try( + rule.statisticRangeExpectation.strictMinEnabled, + rule.statistic_range_expectation.strict_min_enabled, + null + ) + strict_max_enabled = try( + rule.statisticRangeExpectation.strictMaxEnabled, + rule.statistic_range_expectation.strict_max_enabled, + null + ) + } + : null + ) + row_condition_expectation = ( + can(rule.rowConditionExpectation) || can(rule.row_condition_expectation) + ? { + sql_expression = try( + rule.rowConditionExpectation.sqlExpression, + rule.row_condition_expectation.sql_expression, + null + ) + } + : null + ) + table_condition_expectation = ( + can(rule.tableConditionExpectation) || can(rule.table_condition_expectation) + ? { + sql_expression = try( + rule.tableConditionExpectation.sqlExpression, + rule.table_condition_expectation.sql_expression, + null + ) + } + : null + ) + } + ] + sampling_percent = try( + local._factory_data.samplingPercent, + local._factory_data.sampling_percent, + null + ) + } +} diff --git a/main.tf b/main.tf index e1b6634d5d..0d9ad82ed6 100644 --- a/main.tf +++ b/main.tf @@ -15,17 +15,31 @@ */ locals { - prefix = var.prefix == null || var.prefix == "" ? "" : "${var.prefix}-" - _file_data_quality_spec = var.data_quality_spec_file == null ? null : { - sampling_percent = try(local._file_data_quality_spec_raw.samplingPercent, local._file_data_quality_spec_raw.sampling_percent, null) - row_filter = try(local._file_data_quality_spec_raw.rowFilter, local._file_data_quality_spec_raw.row_filter, null) - rules = local._parsed_rules - post_scan_actions = try(local._file_data_quality_spec_raw.postScanActions, local._file_data_quality_spec_raw.post_scan_actions, null) + data_quality_spec = { + post_scan_actions = try( + var.data_quality_spec.post_scan_actions, + local.factory_data.post_scan_actions, + null + ) + row_filter = try( + var.data_quality_spec.row_filter, + local.factory_data.row_filter, + null + ) + rules = concat( + try(var.data_quality_spec.rules, []), + try(local.factory_data.rules, []) + ) + sampling_percent = try( + var.data_quality_spec.sampling_percent, + local.factory_data.sampling_percent, + null + ) } - data_quality_spec = ( - var.data_quality_spec != null || var.data_quality_spec_file != null ? - merge(var.data_quality_spec, local._file_data_quality_spec) : - null + prefix = var.prefix == null || var.prefix == "" ? "" : "${var.prefix}-" + use_data_quality = ( + var.data_quality_spec != null || + var.factories_config.data_quality_spec != null ) } @@ -68,7 +82,7 @@ resource "google_dataplex_datascan" "datascan" { } dynamic "data_quality_spec" { - for_each = local.data_quality_spec != null ? [""] : [] + for_each = local.use_data_quality ? [""] : [] content { sampling_percent = try(local.data_quality_spec.sampling_percent, null) row_filter = try(local.data_quality_spec.row_filter, null) @@ -76,9 +90,16 @@ resource "google_dataplex_datascan" "datascan" { for_each = local.data_quality_spec.post_scan_actions != null ? [""] : [] content { dynamic "bigquery_export" { - for_each = local.data_quality_spec.post_scan_actions.bigquery_export != null ? [""] : [] + for_each = ( + local.data_quality_spec.post_scan_actions.bigquery_export != null + ? [""] + : [] + ) content { - results_table = try(local.data_quality_spec.post_scan_actions.bigquery_export.results_table, null) + results_table = try( + local.data_quality_spec.post_scan_actions.bigquery_export.results_table, + null + ) } } } @@ -98,55 +119,85 @@ resource "google_dataplex_datascan" "datascan" { } dynamic "range_expectation" { - for_each = try(rules.value.range_expectation, null) != null ? [""] : [] + for_each = ( + try(rules.value.range_expectation, null) != null ? [""] : [] + ) content { - min_value = try(rules.value.range_expectation.min_value, null) - max_value = try(rules.value.range_expectation.max_value, null) - strict_min_enabled = try(rules.value.range_expectation.strict_min_enabled, null) - strict_max_enabled = try(rules.value.range_expectation.strict_max_enabled, null) + min_value = try( + rules.value.range_expectation.min_value, null + ) + max_value = try( + rules.value.range_expectation.max_value, null + ) + strict_min_enabled = try( + rules.value.range_expectation.strict_min_enabled, null + ) + strict_max_enabled = try( + rules.value.range_expectation.strict_max_enabled, null + ) } } dynamic "set_expectation" { - for_each = try(rules.value.set_expectation, null) != null ? [""] : [] + for_each = ( + try(rules.value.set_expectation, null) != null ? [""] : [] + ) content { values = rules.value.set_expectation.values } } dynamic "uniqueness_expectation" { - for_each = try(rules.value.uniqueness_expectation, null) != null ? [""] : [] + for_each = ( + try(rules.value.uniqueness_expectation, null) != null ? [""] : [] + ) content { } } dynamic "regex_expectation" { - for_each = try(rules.value.regex_expectation, null) != null ? [""] : [] + for_each = ( + try(rules.value.regex_expectation, null) != null ? [""] : [] + ) content { regex = rules.value.regex_expectation.regex } } dynamic "statistic_range_expectation" { - for_each = try(rules.value.statistic_range_expectation, null) != null ? [""] : [] + for_each = ( + try(rules.value.statistic_range_expectation, null) != null ? [""] : [] + ) content { - min_value = try(rules.value.statistic_range_expectation.min_value, null) - max_value = try(rules.value.statistic_range_expectation.max_value, null) - strict_min_enabled = try(rules.value.statistic_range_expectation.strict_min_enabled, null) - strict_max_enabled = try(rules.value.statistic_range_expectation.strict_max_enabled, null) - statistic = rules.value.statistic_range_expectation.statistic + min_value = try( + rules.value.statistic_range_expectation.min_value, null + ) + max_value = try( + rules.value.statistic_range_expectation.max_value, null + ) + strict_min_enabled = try( + rules.value.statistic_range_expectation.strict_min_enabled, null + ) + strict_max_enabled = try( + rules.value.statistic_range_expectation.strict_max_enabled, null + ) + statistic = rules.value.statistic_range_expectation.statistic } } dynamic "row_condition_expectation" { - for_each = try(rules.value.row_condition_expectation, null) != null ? [""] : [] + for_each = ( + try(rules.value.row_condition_expectation, null) != null ? [""] : [] + ) content { sql_expression = rules.value.row_condition_expectation.sql_expression } } dynamic "table_condition_expectation" { - for_each = try(rules.value.table_condition_expectation, null) != null ? [""] : [] + for_each = ( + try(rules.value.table_condition_expectation, null) != null ? [""] : [] + ) content { sql_expression = rules.value.table_condition_expectation.sql_expression } @@ -159,8 +210,16 @@ resource "google_dataplex_datascan" "datascan" { lifecycle { precondition { - condition = length([for spec in [var.data_profile_spec, var.data_quality_spec, var.data_quality_spec_file] : spec if spec != null]) == 1 - error_message = "DataScan can only contain one of 'data_profile_spec', 'data_quality_spec', 'data_quality_spec_file'." + condition = ( + length([ + for spec in [ + var.data_profile_spec, + var.data_quality_spec, + var.factories_config.data_quality_spec + ] : spec if spec != null + ]) == 1 + ) + error_message = "DataScan can only contain one of 'data_profile_spec', 'data_quality_spec', 'factories_config.data_quality_spec'." } precondition { condition = alltrue([ diff --git a/rules_parsing.tf b/rules_parsing.tf deleted file mode 100644 index bbdc82206b..0000000000 --- a/rules_parsing.tf +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -locals { - _file_data_quality_spec_raw = var.data_quality_spec_file != null ? yamldecode(file(var.data_quality_spec_file.path)) : tomap({}) - _parsed_rules = [ - for rule in try(local._file_data_quality_spec_raw.rules, []) : { - column = try(rule.column, null) - ignore_null = try(rule.ignoreNull, rule.ignore_null, null) - dimension = rule.dimension - threshold = try(rule.threshold, null) - non_null_expectation = try(rule.nonNullExpectation, rule.non_null_expectation, null) - range_expectation = can(rule.rangeExpectation) || can(rule.range_expectation) ? { - min_value = try(rule.rangeExpectation.minValue, rule.range_expectation.min_value, null) - max_value = try(rule.rangeExpectation.maxValue, rule.range_expectation.max_value, null) - strict_min_enabled = try(rule.rangeExpectation.strictMinEnabled, rule.range_expectation.strict_min_enabled, null) - strict_max_enabled = try(rule.rangeExpectation.strictMaxEnabled, rule.range_expectation.strict_max_enabled, null) - } : null - regex_expectation = can(rule.regexExpectation) || can(rule.regex_expectation) ? { - regex = try(rule.regexExpectation.regex, rule.regex_expectation.regex, null) - } : null - set_expectation = can(rule.setExpectation) || can(rule.set_expectation) ? { - values = try(rule.setExpectation.values, rule.set_expectation.values, null) - } : null - uniqueness_expectation = try(rule.uniquenessExpectation, rule.uniqueness_expectation, null) - statistic_range_expectation = can(rule.statisticRangeExpectation) || can(rule.statistic_range_expectation) ? { - statistic = try(rule.statisticRangeExpectation.statistic, rule.statistic_range_expectation.statistic) - min_value = try(rule.statisticRangeExpectation.minValue, rule.statistic_range_expectation.min_value, null) - max_value = try(rule.statisticRangeExpectation.maxValue, rule.statistic_range_expectation.max_value, null) - strict_min_enabled = try(rule.statisticRangeExpectation.strictMinEnabled, rule.statistic_range_expectation.strict_min_enabled, null) - strict_max_enabled = try(rule.statisticRangeExpectation.strictMaxEnabled, rule.statistic_range_expectation.strict_max_enabled, null) - } : null - row_condition_expectation = can(rule.rowConditionExpectation) || can(rule.row_condition_expectation) ? { - sql_expression = try(rule.rowConditionExpectation.sqlExpression, rule.row_condition_expectation.sql_expression, null) - } : null - table_condition_expectation = can(rule.tableConditionExpectation) || can(rule.table_condition_expectation) ? { - sql_expression = try(rule.tableConditionExpectation.sqlExpression, rule.table_condition_expectation.sql_expression, null) - } : null - } - ] -} \ No newline at end of file diff --git a/variables.tf b/variables.tf index cab105bfed..c01774f75d 100644 --- a/variables.tf +++ b/variables.tf @@ -82,14 +82,6 @@ variable "data_quality_spec" { }) } -variable "data_quality_spec_file" { - description = "Path to a YAML file containing DataQualityScan related setting. Input content can use either camelCase or snake_case. Variables description are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec." - default = null - type = object({ - path = string - }) -} - variable "description" { description = "Custom description for DataScan." default = null @@ -102,6 +94,15 @@ variable "execution_schedule" { default = null } +variable "factories_config" { + description = "Paths to data files and folders that enable factory functionality." + type = object({ + data_quality_spec = optional(string) + }) + nullable = false + default = {} +} + variable "incremental_field" { description = "The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table." type = string diff --git a/versions.tf b/versions.tf index 3db0e2076e..f43fef270d 100644 --- a/versions.tf +++ b/versions.tf @@ -13,15 +13,15 @@ # limitations under the License. terraform { - required_version = ">= 1.7.0" + required_version = ">= 1.7.4" required_providers { google = { source = "hashicorp/google" - version = ">= 5.11.0, < 6.0.0" # tftest + version = ">= 5.12.0, < 6.0.0" # tftest } google-beta = { source = "hashicorp/google-beta" - version = ">= 5.11.0, < 6.0.0" # tftest + version = ">= 5.12.0, < 6.0.0" # tftest } } }