diff --git a/README.md b/README.md
index 4053b60699..4b9ecbccc3 100644
--- a/README.md
+++ b/README.md
@@ -161,8 +161,8 @@ module "dataplex-datascan" {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
incremental_field = "modified_date"
- data_quality_spec_file = {
- path = "config/data_quality_spec.yaml"
+ factories_config = {
+ data_quality_spec = "config/data_quality_spec.yaml"
}
}
# tftest modules=1 resources=1 files=data_quality_spec inventory=datascan_dq.yaml
@@ -244,8 +244,8 @@ module "dataplex-datascan" {
resource = "//bigquery.googleapis.com/projects/bigquery-public-data/datasets/austin_bikeshare/tables/bikeshare_stations"
}
incremental_field = "modified_date"
- data_quality_spec_file = {
- path = "config/data_quality_spec_camel_case.yaml"
+ factories_config = {
+ data_quality_spec = "config/data_quality_spec_camel_case.yaml"
}
}
# tftest modules=1 resources=1 files=data_quality_spec_camel_case inventory=datascan_dq.yaml
@@ -431,21 +431,21 @@ module "dataplex-datascan" {
| name | description | type | required | default |
|---|---|:---:|:---:|:---:|
| [data](variables.tf#L17) | The data source for DataScan. The source can be either a Dataplex `entity` or a BigQuery `resource`. | object({…})
| ✓ | |
-| [name](variables.tf#L118) | Name of Dataplex Scan. | string
| ✓ | |
-| [project_id](variables.tf#L129) | The ID of the project where the Dataplex DataScan will be created. | string
| ✓ | |
-| [region](variables.tf#L134) | Region for the Dataplex DataScan. | string
| ✓ | |
+| [name](variables.tf#L119) | Name of Dataplex Scan. | string
| ✓ | |
+| [project_id](variables.tf#L130) | The ID of the project where the Dataplex DataScan will be created. | string
| ✓ | |
+| [region](variables.tf#L135) | Region for the Dataplex DataScan. | string
| ✓ | |
| [data_profile_spec](variables.tf#L29) | DataProfileScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataProfileSpec. | object({…})
| | null
|
| [data_quality_spec](variables.tf#L38) | DataQualityScan related setting. Variable descriptions are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec. | object({…})
| | null
|
-| [data_quality_spec_file](variables.tf#L85) | Path to a YAML file containing DataQualityScan related setting. Input content can use either camelCase or snake_case. Variables description are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec. | object({…})
| | null
|
-| [description](variables.tf#L93) | Custom description for DataScan. | string
| | null
|
-| [execution_schedule](variables.tf#L99) | Schedule DataScan to run periodically based on a cron schedule expression. If not specified, the DataScan is created with `on_demand` schedule, which means it will not run until the user calls `dataScans.run` API. | string
| | null
|
+| [description](variables.tf#L85) | Custom description for DataScan. | string
| | null
|
+| [execution_schedule](variables.tf#L91) | Schedule DataScan to run periodically based on a cron schedule expression. If not specified, the DataScan is created with `on_demand` schedule, which means it will not run until the user calls `dataScans.run` API. | string
| | null
|
+| [factories_config](variables.tf#L97) | Paths to data files and folders that enable factory functionality. | object({…})
| | {}
|
| [iam](variables-iam.tf#L24) | Dataplex DataScan IAM bindings in {ROLE => [MEMBERS]} format. | map(list(string))
| | {}
|
| [iam_bindings](variables-iam.tf#L31) | Authoritative IAM bindings in {KEY => {role = ROLE, members = [], condition = {}}}. Keys are arbitrary. | map(object({…}))
| | {}
|
| [iam_bindings_additive](variables-iam.tf#L46) | Individual additive IAM bindings. Keys are arbitrary. | map(object({…}))
| | {}
|
| [iam_by_principals](variables-iam.tf#L17) | Authoritative IAM binding in {PRINCIPAL => [ROLES]} format. Principals need to be statically defined to avoid cycle errors. Merged internally with the `iam` variable. | map(list(string))
| | {}
|
-| [incremental_field](variables.tf#L105) | The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table. | string
| | null
|
-| [labels](variables.tf#L111) | Resource labels. | map(string)
| | {}
|
-| [prefix](variables.tf#L123) | Optional prefix used to generate Dataplex DataScan ID. | string
| | null
|
+| [incremental_field](variables.tf#L106) | The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table. | string
| | null
|
+| [labels](variables.tf#L112) | Resource labels. | map(string)
| | {}
|
+| [prefix](variables.tf#L124) | Optional prefix used to generate Dataplex DataScan ID. | string
| | null
|
## Outputs
diff --git a/factory.tf b/factory.tf
new file mode 100644
index 0000000000..964e232f12
--- /dev/null
+++ b/factory.tf
@@ -0,0 +1,150 @@
+/**
+ * Copyright 2023 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+locals {
+ _factory_data = (
+ var.factories_config.data_quality_spec == null
+ ? null
+ : yamldecode(file(pathexpand(var.factories_config.data_quality_spec)))
+ )
+ factory_data = {
+ post_scan_actions = try(
+ local._factory_data.postScanActions,
+ local._factory_data.post_scan_actions,
+ null
+ )
+ row_filter = try(
+ local._factory_data.rowFilter,
+ local._factory_data.row_filter,
+ null
+ )
+ rules = [
+ for rule in try(local._factory_data.rules, []) : {
+ column = try(rule.column, null)
+ ignore_null = try(rule.ignoreNull, rule.ignore_null, null)
+ dimension = rule.dimension
+ threshold = try(rule.threshold, null)
+ non_null_expectation = try(
+ rule.nonNullExpectation, rule.non_null_expectation, null
+ )
+ range_expectation = (
+ can(rule.rangeExpectation) || can(rule.range_expectation)
+ ? {
+ min_value = try(
+ rule.rangeExpectation.minValue,
+ rule.range_expectation.min_value,
+ null
+ )
+ max_value = try(
+ rule.rangeExpectation.maxValue,
+ rule.range_expectation.max_value,
+ null
+ )
+ strict_min_enabled = try(
+ rule.rangeExpectation.strictMinEnabled,
+ rule.range_expectation.strict_min_enabled,
+ null
+ )
+ strict_max_enabled = try(
+ rule.rangeExpectation.strictMaxEnabled,
+ rule.range_expectation.strict_max_enabled,
+ null
+ )
+ }
+ : null
+ )
+ regex_expectation = (
+ can(rule.regexExpectation) || can(rule.regex_expectation)
+ ? {
+ regex = try(
+ rule.regexExpectation.regex, rule.regex_expectation.regex, null
+ )
+ }
+ : null
+ )
+ set_expectation = (
+ can(rule.setExpectation) || can(rule.set_expectation)
+ ? {
+ values = try(
+ rule.setExpectation.values, rule.set_expectation.values, null
+ )
+ }
+ : null
+ )
+ uniqueness_expectation = try(
+ rule.uniquenessExpectation, rule.uniqueness_expectation, null
+ )
+ statistic_range_expectation = (
+ can(rule.statisticRangeExpectation) || can(rule.statistic_range_expectation)
+ ? {
+ statistic = try(
+ rule.statisticRangeExpectation.statistic,
+ rule.statistic_range_expectation.statistic
+ )
+ min_value = try(
+ rule.statisticRangeExpectation.minValue,
+ rule.statistic_range_expectation.min_value,
+ null
+ )
+ max_value = try(
+ rule.statisticRangeExpectation.maxValue,
+ rule.statistic_range_expectation.max_value,
+ null
+ )
+ strict_min_enabled = try(
+ rule.statisticRangeExpectation.strictMinEnabled,
+ rule.statistic_range_expectation.strict_min_enabled,
+ null
+ )
+ strict_max_enabled = try(
+ rule.statisticRangeExpectation.strictMaxEnabled,
+ rule.statistic_range_expectation.strict_max_enabled,
+ null
+ )
+ }
+ : null
+ )
+ row_condition_expectation = (
+ can(rule.rowConditionExpectation) || can(rule.row_condition_expectation)
+ ? {
+ sql_expression = try(
+ rule.rowConditionExpectation.sqlExpression,
+ rule.row_condition_expectation.sql_expression,
+ null
+ )
+ }
+ : null
+ )
+ table_condition_expectation = (
+ can(rule.tableConditionExpectation) || can(rule.table_condition_expectation)
+ ? {
+ sql_expression = try(
+ rule.tableConditionExpectation.sqlExpression,
+ rule.table_condition_expectation.sql_expression,
+ null
+ )
+ }
+ : null
+ )
+ }
+ ]
+ sampling_percent = try(
+ local._factory_data.samplingPercent,
+ local._factory_data.sampling_percent,
+ null
+ )
+ }
+}
diff --git a/main.tf b/main.tf
index e1b6634d5d..0d9ad82ed6 100644
--- a/main.tf
+++ b/main.tf
@@ -15,17 +15,31 @@
*/
locals {
- prefix = var.prefix == null || var.prefix == "" ? "" : "${var.prefix}-"
- _file_data_quality_spec = var.data_quality_spec_file == null ? null : {
- sampling_percent = try(local._file_data_quality_spec_raw.samplingPercent, local._file_data_quality_spec_raw.sampling_percent, null)
- row_filter = try(local._file_data_quality_spec_raw.rowFilter, local._file_data_quality_spec_raw.row_filter, null)
- rules = local._parsed_rules
- post_scan_actions = try(local._file_data_quality_spec_raw.postScanActions, local._file_data_quality_spec_raw.post_scan_actions, null)
+ data_quality_spec = {
+ post_scan_actions = try(
+ var.data_quality_spec.post_scan_actions,
+ local.factory_data.post_scan_actions,
+ null
+ )
+ row_filter = try(
+ var.data_quality_spec.row_filter,
+ local.factory_data.row_filter,
+ null
+ )
+ rules = concat(
+ try(var.data_quality_spec.rules, []),
+ try(local.factory_data.rules, [])
+ )
+ sampling_percent = try(
+ var.data_quality_spec.sampling_percent,
+ local.factory_data.sampling_percent,
+ null
+ )
}
- data_quality_spec = (
- var.data_quality_spec != null || var.data_quality_spec_file != null ?
- merge(var.data_quality_spec, local._file_data_quality_spec) :
- null
+ prefix = var.prefix == null || var.prefix == "" ? "" : "${var.prefix}-"
+ use_data_quality = (
+ var.data_quality_spec != null ||
+ var.factories_config.data_quality_spec != null
)
}
@@ -68,7 +82,7 @@ resource "google_dataplex_datascan" "datascan" {
}
dynamic "data_quality_spec" {
- for_each = local.data_quality_spec != null ? [""] : []
+ for_each = local.use_data_quality ? [""] : []
content {
sampling_percent = try(local.data_quality_spec.sampling_percent, null)
row_filter = try(local.data_quality_spec.row_filter, null)
@@ -76,9 +90,16 @@ resource "google_dataplex_datascan" "datascan" {
for_each = local.data_quality_spec.post_scan_actions != null ? [""] : []
content {
dynamic "bigquery_export" {
- for_each = local.data_quality_spec.post_scan_actions.bigquery_export != null ? [""] : []
+ for_each = (
+ local.data_quality_spec.post_scan_actions.bigquery_export != null
+ ? [""]
+ : []
+ )
content {
- results_table = try(local.data_quality_spec.post_scan_actions.bigquery_export.results_table, null)
+ results_table = try(
+ local.data_quality_spec.post_scan_actions.bigquery_export.results_table,
+ null
+ )
}
}
}
@@ -98,55 +119,85 @@ resource "google_dataplex_datascan" "datascan" {
}
dynamic "range_expectation" {
- for_each = try(rules.value.range_expectation, null) != null ? [""] : []
+ for_each = (
+ try(rules.value.range_expectation, null) != null ? [""] : []
+ )
content {
- min_value = try(rules.value.range_expectation.min_value, null)
- max_value = try(rules.value.range_expectation.max_value, null)
- strict_min_enabled = try(rules.value.range_expectation.strict_min_enabled, null)
- strict_max_enabled = try(rules.value.range_expectation.strict_max_enabled, null)
+ min_value = try(
+ rules.value.range_expectation.min_value, null
+ )
+ max_value = try(
+ rules.value.range_expectation.max_value, null
+ )
+ strict_min_enabled = try(
+ rules.value.range_expectation.strict_min_enabled, null
+ )
+ strict_max_enabled = try(
+ rules.value.range_expectation.strict_max_enabled, null
+ )
}
}
dynamic "set_expectation" {
- for_each = try(rules.value.set_expectation, null) != null ? [""] : []
+ for_each = (
+ try(rules.value.set_expectation, null) != null ? [""] : []
+ )
content {
values = rules.value.set_expectation.values
}
}
dynamic "uniqueness_expectation" {
- for_each = try(rules.value.uniqueness_expectation, null) != null ? [""] : []
+ for_each = (
+ try(rules.value.uniqueness_expectation, null) != null ? [""] : []
+ )
content {
}
}
dynamic "regex_expectation" {
- for_each = try(rules.value.regex_expectation, null) != null ? [""] : []
+ for_each = (
+ try(rules.value.regex_expectation, null) != null ? [""] : []
+ )
content {
regex = rules.value.regex_expectation.regex
}
}
dynamic "statistic_range_expectation" {
- for_each = try(rules.value.statistic_range_expectation, null) != null ? [""] : []
+ for_each = (
+ try(rules.value.statistic_range_expectation, null) != null ? [""] : []
+ )
content {
- min_value = try(rules.value.statistic_range_expectation.min_value, null)
- max_value = try(rules.value.statistic_range_expectation.max_value, null)
- strict_min_enabled = try(rules.value.statistic_range_expectation.strict_min_enabled, null)
- strict_max_enabled = try(rules.value.statistic_range_expectation.strict_max_enabled, null)
- statistic = rules.value.statistic_range_expectation.statistic
+ min_value = try(
+ rules.value.statistic_range_expectation.min_value, null
+ )
+ max_value = try(
+ rules.value.statistic_range_expectation.max_value, null
+ )
+ strict_min_enabled = try(
+ rules.value.statistic_range_expectation.strict_min_enabled, null
+ )
+ strict_max_enabled = try(
+ rules.value.statistic_range_expectation.strict_max_enabled, null
+ )
+ statistic = rules.value.statistic_range_expectation.statistic
}
}
dynamic "row_condition_expectation" {
- for_each = try(rules.value.row_condition_expectation, null) != null ? [""] : []
+ for_each = (
+ try(rules.value.row_condition_expectation, null) != null ? [""] : []
+ )
content {
sql_expression = rules.value.row_condition_expectation.sql_expression
}
}
dynamic "table_condition_expectation" {
- for_each = try(rules.value.table_condition_expectation, null) != null ? [""] : []
+ for_each = (
+ try(rules.value.table_condition_expectation, null) != null ? [""] : []
+ )
content {
sql_expression = rules.value.table_condition_expectation.sql_expression
}
@@ -159,8 +210,16 @@ resource "google_dataplex_datascan" "datascan" {
lifecycle {
precondition {
- condition = length([for spec in [var.data_profile_spec, var.data_quality_spec, var.data_quality_spec_file] : spec if spec != null]) == 1
- error_message = "DataScan can only contain one of 'data_profile_spec', 'data_quality_spec', 'data_quality_spec_file'."
+ condition = (
+ length([
+ for spec in [
+ var.data_profile_spec,
+ var.data_quality_spec,
+ var.factories_config.data_quality_spec
+ ] : spec if spec != null
+ ]) == 1
+ )
+ error_message = "DataScan can only contain one of 'data_profile_spec', 'data_quality_spec', 'factories_config.data_quality_spec'."
}
precondition {
condition = alltrue([
diff --git a/rules_parsing.tf b/rules_parsing.tf
deleted file mode 100644
index bbdc82206b..0000000000
--- a/rules_parsing.tf
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright 2023 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-locals {
- _file_data_quality_spec_raw = var.data_quality_spec_file != null ? yamldecode(file(var.data_quality_spec_file.path)) : tomap({})
- _parsed_rules = [
- for rule in try(local._file_data_quality_spec_raw.rules, []) : {
- column = try(rule.column, null)
- ignore_null = try(rule.ignoreNull, rule.ignore_null, null)
- dimension = rule.dimension
- threshold = try(rule.threshold, null)
- non_null_expectation = try(rule.nonNullExpectation, rule.non_null_expectation, null)
- range_expectation = can(rule.rangeExpectation) || can(rule.range_expectation) ? {
- min_value = try(rule.rangeExpectation.minValue, rule.range_expectation.min_value, null)
- max_value = try(rule.rangeExpectation.maxValue, rule.range_expectation.max_value, null)
- strict_min_enabled = try(rule.rangeExpectation.strictMinEnabled, rule.range_expectation.strict_min_enabled, null)
- strict_max_enabled = try(rule.rangeExpectation.strictMaxEnabled, rule.range_expectation.strict_max_enabled, null)
- } : null
- regex_expectation = can(rule.regexExpectation) || can(rule.regex_expectation) ? {
- regex = try(rule.regexExpectation.regex, rule.regex_expectation.regex, null)
- } : null
- set_expectation = can(rule.setExpectation) || can(rule.set_expectation) ? {
- values = try(rule.setExpectation.values, rule.set_expectation.values, null)
- } : null
- uniqueness_expectation = try(rule.uniquenessExpectation, rule.uniqueness_expectation, null)
- statistic_range_expectation = can(rule.statisticRangeExpectation) || can(rule.statistic_range_expectation) ? {
- statistic = try(rule.statisticRangeExpectation.statistic, rule.statistic_range_expectation.statistic)
- min_value = try(rule.statisticRangeExpectation.minValue, rule.statistic_range_expectation.min_value, null)
- max_value = try(rule.statisticRangeExpectation.maxValue, rule.statistic_range_expectation.max_value, null)
- strict_min_enabled = try(rule.statisticRangeExpectation.strictMinEnabled, rule.statistic_range_expectation.strict_min_enabled, null)
- strict_max_enabled = try(rule.statisticRangeExpectation.strictMaxEnabled, rule.statistic_range_expectation.strict_max_enabled, null)
- } : null
- row_condition_expectation = can(rule.rowConditionExpectation) || can(rule.row_condition_expectation) ? {
- sql_expression = try(rule.rowConditionExpectation.sqlExpression, rule.row_condition_expectation.sql_expression, null)
- } : null
- table_condition_expectation = can(rule.tableConditionExpectation) || can(rule.table_condition_expectation) ? {
- sql_expression = try(rule.tableConditionExpectation.sqlExpression, rule.table_condition_expectation.sql_expression, null)
- } : null
- }
- ]
-}
\ No newline at end of file
diff --git a/variables.tf b/variables.tf
index cab105bfed..c01774f75d 100644
--- a/variables.tf
+++ b/variables.tf
@@ -82,14 +82,6 @@ variable "data_quality_spec" {
})
}
-variable "data_quality_spec_file" {
- description = "Path to a YAML file containing DataQualityScan related setting. Input content can use either camelCase or snake_case. Variables description are provided in https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualitySpec."
- default = null
- type = object({
- path = string
- })
-}
-
variable "description" {
description = "Custom description for DataScan."
default = null
@@ -102,6 +94,15 @@ variable "execution_schedule" {
default = null
}
+variable "factories_config" {
+ description = "Paths to data files and folders that enable factory functionality."
+ type = object({
+ data_quality_spec = optional(string)
+ })
+ nullable = false
+ default = {}
+}
+
variable "incremental_field" {
description = "The unnested field (of type Date or Timestamp) that contains values which monotonically increase over time. If not specified, a data scan will run for all data in the table."
type = string
diff --git a/versions.tf b/versions.tf
index 3db0e2076e..f43fef270d 100644
--- a/versions.tf
+++ b/versions.tf
@@ -13,15 +13,15 @@
# limitations under the License.
terraform {
- required_version = ">= 1.7.0"
+ required_version = ">= 1.7.4"
required_providers {
google = {
source = "hashicorp/google"
- version = ">= 5.11.0, < 6.0.0" # tftest
+ version = ">= 5.12.0, < 6.0.0" # tftest
}
google-beta = {
source = "hashicorp/google-beta"
- version = ">= 5.11.0, < 6.0.0" # tftest
+ version = ">= 5.12.0, < 6.0.0" # tftest
}
}
}