From 88ea0ca6bcd240c1d687926d5a2a0770199f3db3 Mon Sep 17 00:00:00 2001
From: Bohdan Yurov <bohdan.yurov@globallogic.com>
Date: Mon, 10 Jun 2019 19:23:40 +0300
Subject: [PATCH] Fixes #105: DM/bigquery: refactoring
 https://github.com/GoogleCloudPlatform/cloud-foundation-toolkit/issues/105

- Added version, links to docs
- Switched to using type provider
- Added support for cross-project resource creation
- Added missing fields to datasets: "friendlyName",
"defaultPartitionExpirationMs", "labels", "access"
- Added missing fields to tables: "description", "labels", "clustering",
"requirePartitionFilter", "externalDataConfiguration",
"encryptionConfiguration"
- Fixed resource names
- Added uniqueItems: true and additionalProperties: false
---
 dm/templates/bigquery/bigquery_dataset.py     |  22 +-
 .../bigquery/bigquery_dataset.py.schema       |  82 +++--
 dm/templates/bigquery/bigquery_table.py       |  32 +-
 .../bigquery/bigquery_table.py.schema         | 344 +++++++++++++++++-
 4 files changed, 431 insertions(+), 49 deletions(-)

diff --git a/dm/templates/bigquery/bigquery_dataset.py b/dm/templates/bigquery/bigquery_dataset.py
index 11d3849e1f2b..e2725c7d18e0 100644
--- a/dm/templates/bigquery/bigquery_dataset.py
+++ b/dm/templates/bigquery/bigquery_dataset.py
@@ -20,15 +20,18 @@ def generate_config(context):
     # You can modify the roles you wish to whitelist.
     whitelisted_roles = ['READER', 'WRITER', 'OWNER']
 
-    name = context.properties['name']
+    properties = context.properties
+    name = properties.get('name', context.env['name'])
+    project_id = properties.get('project', context.env['project'])
 
     properties = {
         'datasetReference':
             {
                 'datasetId': name,
-                'projectId': context.env['project']
+                'projectId': project_id
             },
-        'location': context.properties['location']
+        'location': context.properties['location'],
+        'projectId': project_id,
     }
 
     optional_properties = ['description', 'defaultTableExpirationMs']
@@ -68,8 +71,9 @@ def generate_config(context):
 
     resources = [
         {
-            'type': 'bigquery.v2.dataset',
-            'name': name,
+            # https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
+            'type': 'gcp-types/bigquery-v2:datasets',
+            'name': context.env['name'],
             'properties': properties
         }
     ]
@@ -77,7 +81,7 @@ def generate_config(context):
     outputs = [
         {
             'name': 'selfLink',
-            'value': '$(ref.{}.selfLink)'.format(name)
+            'value': '$(ref.{}.selfLink)'.format(context.env['name'])
         },
         {
             'name': 'datasetId',
@@ -85,15 +89,15 @@ def generate_config(context):
         },
         {
             'name': 'etag',
-            'value': '$(ref.{}.etag)'.format(name)
+            'value': '$(ref.{}.etag)'.format(context.env['name'])
         },
         {
             'name': 'creationTime',
-            'value': '$(ref.{}.creationTime)'.format(name)
+            'value': '$(ref.{}.creationTime)'.format(context.env['name'])
         },
         {
             'name': 'lastModifiedTime',
-            'value': '$(ref.{}.lastModifiedTime)'.format(name)
+            'value': '$(ref.{}.lastModifiedTime)'.format(context.env['name'])
         }
     ]
 
diff --git a/dm/templates/bigquery/bigquery_dataset.py.schema b/dm/templates/bigquery/bigquery_dataset.py.schema
index 1e07c2fcd37a..d53678ffdd55 100644
--- a/dm/templates/bigquery/bigquery_dataset.py.schema
+++ b/dm/templates/bigquery/bigquery_dataset.py.schema
@@ -15,11 +15,17 @@
 info:
   title: BigQuery Dataset
   author: Sourced Group Inc.
+  version: 1.0.0
   description: |
     Creates a BigQuery dataset.
+
     For information on this resource:
     https://cloud.google.com/bigquery/docs/.
 
+    APIs endpoints used by this template:
+    - gcp-types/bigquery-v2:datasets =>
+        https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
+
 imports:
   - path: bigquery_dataset.py
 
@@ -31,7 +37,21 @@ required:
 properties:
   name:
     type: string
-    description: The resource name.
+    description: |
+      The table dataset name. Resource name would be used if omitted.
+  project:
+    type: string
+    description: |
+      The project ID of the project containing the dataset. The
+      Google apps domain is prefixed if applicable.
+  friendlyName:
+    type: string
+    description: |
+      A descriptive name for the dataset.
+  description:
+    type: string
+    description: |
+      A user-friendly description of the dataset.
   location:
     type: string
     description: |
@@ -45,6 +65,7 @@ properties:
       - US
   access:
     type: array
+    uniqueItems: true
     description: |
       An array of objects that define dataset access for one or more
       entities. You can set this property when inserting or updating
@@ -56,33 +77,34 @@ properties:
         access.specialGroup: projectOwners; access.role: OWNER
         access.userByEmail: [dataset creator email]; access.role: OWNER
     items:
-      role:
-        type: string
-        description: |
-          The role (rights) granted to the user specified by the other
-          member of the access object. The following string values are
-          supported: READER, WRITER, OWNER. See details at 
-          https://cloud.google.com/bigquery/docs/access-control.
-        enum:
-          - READER
-          - WRITER
-          - OWNER
-      oneOf:
-        - domain:
+      type: object
+      additionalProperties: false
+      required:
+        - role
+      properties:
+        role:
+          type: string
+          description: |
+            An IAM role ID that should be granted to the user, group, or domain specified in this access entry.
+            The following legacy mappings will be applied: OWNER <=> roles/bigquery.dataOwner
+            WRITER <=> roles/bigquery.dataEditor READER <=> roles/bigquery.dataViewer This field will accept any of
+            the above formats, but will return only the legacy format. For example, if you set this field to
+            "roles/bigquery.dataOwner", it will be returned back as "OWNER". @mutable bigquery.datasets.update
+        domain:
           type: string
           description: |
             The domain to grant access to. All users signed in with the 
             specified domain are granted the corresponding access.
             Example: "example.com".
-        - userByEmail:
+        userByEmail:
           type: string
           description: |
             The email address of a user to grant access to. For example:
             fred@example.com.
-        - groupByEmail:
+        groupByEmail:
           type: string
           description: The email address of a Google Group to grant access to.
-        - specialGroup:
+        specialGroup:
           type: string
           description: |
             The special group to grant access to. Possible values include:
@@ -90,8 +112,9 @@ properties:
               projectReaders: readers of the enclosing project
               projectWriters: writers of the enclosing project
               allAuthenticatedUsers: all authenticated BigQuery users
-        - view:
+        view:
           type: object
+          additionalProperties: false
           description: |
             A view from a different dataset to grant access to. Queries
             executed against that view have the Read access to tables in that
@@ -112,9 +135,6 @@ properties:
                 The table ID. The ID must contain only letters
                 (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
                 length is 1,024 characters.
-  description:
-    type: string
-    description: A user-friendly description of the dataset.
   setDefaultOwner:
     type: boolean
     default: False
@@ -136,6 +156,26 @@ properties:
       expirationTime while creating the table, that value takes precedence over
       the default expiration time indicated by this property.
     minimum: 3600000
+  defaultPartitionExpirationMs:
+    type: string
+    format: int64
+    description: |
+      The default partition expiration for all partitioned tables in the dataset, in milliseconds.
+      Once this property is set, all newly-created partitioned tables in the dataset will have an expirationMs
+      property in the timePartitioning settings set to this value, and changing the value will only affect new tables,
+      not existing ones. The storage in a partition will have an expiration time of its partition time plus this value.
+      Setting this property overrides the use of defaultTableExpirationMs for partitioned tables: only one of
+      defaultTableExpirationMs and defaultPartitionExpirationMs will be used for any new partitioned table.
+      If you provide an explicit timePartitioning.expirationMs when creating or updating a partitioned table,
+      that value takes precedence over the default partition expiration time indicated by this property.
+  labels:
+    type: object
+    description: |
+      Map labels associated with this dataset.
+      Example:
+        name: wrench
+        mass: 1.3kg
+        count: 3
 
 outputs:
   properties:
diff --git a/dm/templates/bigquery/bigquery_table.py b/dm/templates/bigquery/bigquery_table.py
index ea527d3366fd..d4ec420e3021 100644
--- a/dm/templates/bigquery/bigquery_table.py
+++ b/dm/templates/bigquery/bigquery_table.py
@@ -18,16 +18,19 @@
 def generate_config(context):
     """ Entry point for the deployment resources. """
 
-    name = context.properties['name']
+    properties = context.properties
+    name = properties.get('name', context.env['name'])
+    project_id = properties.get('project', context.env['project'])
 
     properties = {
         'tableReference':
             {
                 'tableId': name,
                 'datasetId': context.properties['datasetId'],
-                'projectId': context.env['project']
+                'projectId': project_id
             },
-        'datasetId': context.properties['datasetId']
+        'datasetId': context.properties['datasetId'],
+        'projectId': project_id,
     }
 
     optional_properties = [
@@ -48,8 +51,9 @@ def generate_config(context):
 
     resources = [
         {
-            'type': 'bigquery.v2.table',
-            'name': name,
+            # https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
+            'type': 'gcp-types/bigquery-v2:tables',
+            'name': context.env['name'],
             'properties': properties,
             'metadata': {
                 'dependsOn': [context.properties['datasetId']]
@@ -60,39 +64,39 @@ def generate_config(context):
     outputs = [
         {
             'name': 'selfLink',
-            'value': '$(ref.{}.selfLink)'.format(name)
+            'value': '$(ref.{}.selfLink)'.format(context.env['name'])
         },
         {
             'name': 'etag',
-            'value': '$(ref.{}.etag)'.format(name)
+            'value': '$(ref.{}.etag)'.format(context.env['name'])
         },
         {
             'name': 'creationTime',
-            'value': '$(ref.{}.creationTime)'.format(name)
+            'value': '$(ref.{}.creationTime)'.format(context.env['name'])
         },
         {
             'name': 'lastModifiedTime',
-            'value': '$(ref.{}.lastModifiedTime)'.format(name)
+            'value': '$(ref.{}.lastModifiedTime)'.format(context.env['name'])
         },
         {
             'name': 'location',
-            'value': '$(ref.{}.location)'.format(name)
+            'value': '$(ref.{}.location)'.format(context.env['name'])
         },
         {
             'name': 'numBytes',
-            'value': '$(ref.{}.numBytes)'.format(name)
+            'value': '$(ref.{}.numBytes)'.format(context.env['name'])
         },
         {
             'name': 'numLongTermBytes',
-            'value': '$(ref.{}.numLongTermBytes)'.format(name)
+            'value': '$(ref.{}.numLongTermBytes)'.format(context.env['name'])
         },
         {
             'name': 'numRows',
-            'value': '$(ref.{}.numRows)'.format(name)
+            'value': '$(ref.{}.numRows)'.format(context.env['name'])
         },
         {
             'name': 'type',
-            'value': '$(ref.{}.type)'.format(name)
+            'value': '$(ref.{}.type)'.format(context.env['name'])
         }
     ]
 
diff --git a/dm/templates/bigquery/bigquery_table.py.schema b/dm/templates/bigquery/bigquery_table.py.schema
index ad0cbb8865ce..e36cf31a9216 100644
--- a/dm/templates/bigquery/bigquery_table.py.schema
+++ b/dm/templates/bigquery/bigquery_table.py.schema
@@ -15,11 +15,17 @@
 info:
   title: BigQuery Table
   author: Sourced Group Inc.
+  version: 1.0.0
   description: |
     Creates a BigQuery table.
-    For more information on this resource:
+
+    For information on this resource:
     https://cloud.google.com/bigquery/docs/.
 
+    APIs endpoints used by this template:
+    - gcp-types/bigquery-v2:tables =>
+        https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
+
 imports:
   - path: bigquery_table.py
 
@@ -31,14 +37,25 @@ required:
 properties:
   name:
     type: string
-    description: The resource name.
+    description: |
+      The table name name. Resource name would be used if omitted.
+  project:
+    type: string
+    description: |
+      The project ID of the project containing the table. The
+      Google apps domain is prefixed if applicable.
   datasetId:
     type: string
     description: |
       The ID of the dataset the table belongs to.
   friendlyName:
     type: string
-    description: A descriptive name for the table.
+    description: |
+      A descriptive name for the table.
+  description:
+    type: string
+    description: |
+      A user-friendly description of the dataset.
   expirationTime:
     type: string
     description: |
@@ -47,9 +64,314 @@ properties:
       deleted, and their storage is reclaimed. The defaultTableExpirationMs
       property of the encapsulating dataset can be used to set a default
       expirationTime on newly created tables. For example, 1535739430.
+  encryptionConfiguration:
+    type: object
+    additionalProperties: false
+    description: |
+      Custom encryption configuration (e.g., Cloud KMS keys).
+    properties:
+      kmsKeyName:
+        type: string
+        description: |
+          Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table.
+          The BigQuery Service Account associated with your project requires access to this encryption key.
+  externalDataConfiguration:
+    type: object
+    additionalProperties: false
+    description: |
+      Describes the data format, location, and other properties of a table stored outside of BigQuery.
+      By defining these properties, the data source can then be queried as if it were a standard BigQuery table.
+    required:
+      - sourceUris
+      - sourceFormat
+    properties:
+      sourceUris:
+        type: array
+        minItems: 1
+        uniqueItems: true
+        description: |
+          The fully-qualified URIs that point to your data in Google Cloud. For Google Cloud Storage URIs:
+          Each URI can contain one '*' wildcard character and it must come after the 'bucket' name.
+          Size limits related to load jobs apply to external data sources. For Google Cloud Bigtable URIs:
+          Exactly one URI can be specified and it has be a fully specified and valid HTTPS URL for a
+          Google Cloud Bigtable table. For Google Cloud Datastore backups, exactly one URI can be specified.
+          Also, the '*' wildcard character is not allowed.
+        items:
+          type: object
+      schema:
+        type: object
+        description: |
+          The schema for the data. Schema is required for CSV and JSON formats. Schema is disallowed for
+          Google Cloud Bigtable, Cloud Datastore backups, and Avro formats.
+      sourceFormat:
+        type: string
+        description: |
+          The data format. For CSV files, specify "CSV". For Google sheets, specify "GOOGLE_SHEETS".
+          For newline-delimited JSON, specify "NEWLINE_DELIMITED_JSON". For Avro files, specify "AVRO".
+          For Google Cloud Datastore backups, specify "DATASTORE_BACKUP".
+          [Beta] For Google Cloud Bigtable, specify "BIGTABLE".
+        enum:
+          - CSV
+          - GOOGLE_SHEETS
+          - NEWLINE_DELIMITED_JSON
+          - AVRO
+          - DATASTORE_BACKUP
+          - BIGTABLE
+      maxBadRecords:
+        type: number
+        description: |
+          The maximum number of bad records that BigQuery can ignore when reading data. If the number of
+          bad records exceeds this value, an invalid error is returned in the job result.
+          The default value is 0, which requires that all records are valid. This setting is ignored
+          for Google Cloud Bigtable, Google Cloud Datastore backups and Avro formats.
+      autodetect:
+        type: boolean
+        description: |
+          Indicates if BigQuery should allow extra values that are not represented in the table schema.
+          If true, the extra values are ignored. If false, records with extra columns are treated as bad records,
+          and if there are too many bad records, an invalid error is returned in the job result.
+          The default value is false. The sourceFormat property determines what BigQuery treats as an extra value:
+          CSV: Trailing columns JSON: Named values that don't match any column names
+          Google Cloud Bigtable: This setting is ignored
+          Google Cloud Datastore backups: This setting is ignored
+          Avro: This setting is ignored.
+      compression:
+        type: string
+        description: |
+          The compression type of the data source. Possible values include GZIP and NONE. The default value is NONE.
+          This setting is ignored for Google Cloud Bigtable, Google Cloud Datastore backups and Avro formats.
+          An empty string is an invalid value.
+        enum:
+          - NONE
+          - GZIP
+      csvOptions:
+        type: object
+        additionalProperties: false
+        description: |
+          Additional properties to set if sourceFormat is set to CSV.
+        properties:
+          fieldDelimiter:
+            type: string
+            description: |
+              The separator for fields in a CSV file. BigQuery converts the string to ISO-8859-1 encoding,
+              and then uses the first byte of the encoded string to split the data in its raw, binary state.
+              BigQuery also supports the escape sequence "\t" to specify a tab separator.
+              The default value is a comma (',').
+          skipLeadingRows:
+            type: number
+            description: |
+              The number of rows at the top of a CSV file that BigQuery will skip when reading the data.
+              The default value is 0. This property is useful if you have header rows in the file that should be skipped.
+          quote:
+            type: string
+            description: |
+              The value that is used to quote data sections in a CSV file. BigQuery converts the string to
+              ISO-8859-1 encoding, and then uses the first byte of the encoded string to split the data in its raw,
+              binary state. The default value is a double-quote ('"'). If your data does not contain quoted sections,
+              set the property value to an empty string. If your data contains quoted newline characters,
+              you must also set the allowQuotedNewlines property to true. @default '"'
+          allowQuotedNewlines:
+            type: boolean
+            description: |
+              Indicates if BigQuery should allow quoted data sections that contain newline characters in a CSV file.
+              The default value is false.
+          allowJaggedRows:
+            type: boolean
+            description: |
+              Indicates if BigQuery should accept rows that are missing trailing optional columns.
+              If true, BigQuery treats missing trailing columns as null values.
+              If false, records with missing trailing columns are treated as bad records, and if there are
+              too many bad records, an invalid error is returned in the job result. The default value is false.
+          encoding:
+            type: string
+            description: |
+              The character encoding of the data. The supported values are UTF-8 or ISO-8859-1.
+              The default value is UTF-8. BigQuery decodes the data after the raw, binary data has
+              been split using the values of the quote and fieldDelimiter properties.
+            enum:
+              - UTF-8
+              - ISO-8859-1
+      bigtableOptions:
+        type: object
+        additionalProperties: false
+        description: |
+          Additional options if sourceFormat is set to BIGTABLE.
+        properties:
+          columnFamilies:
+            type: array
+            uniqueItems: true
+            description: |
+              tabledata.list of column families to expose in the table schema along with their types.
+              This list restricts the column families that can be referenced in queries and specifies their value types.
+              You can use this list to do type conversions - see the 'type' field for more details.
+              If you leave this list empty, all column families are present in the table schema and their values
+              are read as BYTES. During a query only the column families referenced in that query are read from Bigtable.
+            items:
+              type: object
+              additionalProperties: false
+              properties:
+                familyId:
+                  type: string
+                  description: |
+                    Identifier of the column family.
+                type:
+                  type: string
+                  description: |
+                    The type to convert the value in cells of this column family. The values are expected to be
+                    encoded using HBase Bytes.toBytes function when using the BINARY encoding value.
+                    Following BigQuery types are allowed (case-sensitive) - BYTES STRING INTEGER FLOAT BOOLEAN
+                    Default type is BYTES. This can be overridden for a specific column by listing that
+                    column in 'columns' and specifying a type for it.
+                  enum:
+                    - BYTES
+                    - STRING
+                    - INTEGER
+                    - FLOAT
+                    - BOOLEAN
+                encoding:
+                  type: string
+                  description: |
+                    The encoding of the values when the type is not STRING. Acceptable encoding values are:
+                    - TEXT - indicates values are alphanumeric text strings.
+                    - BINARY - indicates values are encoded using HBase Bytes.toBytes family of functions.
+                    This can be overridden for a specific column by listing that column in
+                    'columns' and specifying an encoding for it.
+                  enum:
+                    - TEXT
+                    - BINARY
+                columns:
+                  type: array
+                  uniqueItems: true
+                  description: |
+                    Lists of columns that should be exposed as individual fields as opposed to a list of
+                    (column name, value) pairs. All columns whose qualifier matches a qualifier in this list
+                    can be accessed as .. Other columns can be accessed as a list through .Column field.
+                  items:
+                    type: object
+                    additionalProperties: false
+                    required:
+                      - qualifierEncoded
+                    properties:
+                      qualifierEncoded:
+                        type: string
+                        description: |
+                          Qualifier of the column. Columns in the parent column family that has this exact qualifier
+                          are exposed as . field. If the qualifier is valid UTF-8 string, it can be specified in
+                          the qualifierString field. Otherwise, a base-64 encoded value must be set to qualifierEncoded.
+                          The column field name is the same as the column qualifier. However, if the qualifier is not a
+                          valid BigQuery field identifier i.e. does not match [a-zA-Z][a-zA-Z0-9_]*, a valid identifier
+                          must be provided as fieldName.
+                      qualifierString:
+                        type: string
+                      fieldName:
+                        type: string
+                        description: |
+                          If the qualifier is not a valid BigQuery field identifier i.e. does not match
+                          [a-zA-Z][a-zA-Z0-9_]*, a valid identifier must be provided as the column field name
+                          and is used as field name in queries.
+                      type:
+                        type: string
+                        description: |
+                          The type to convert the value in cells of this column. The values are expected to be
+                          encoded using HBase Bytes.toBytes function when using the BINARY encoding value.
+                          Following BigQuery types are allowed (case-sensitive) - BYTES STRING INTEGER FLOAT BOOLEAN
+                          Default type is BYTES. 'type' can also be set at the column family level.
+                          However, the setting at this level takes precedence if 'type' is set at both levels.
+                        enum:
+                          - BYTES
+                          - STRING
+                          - INTEGER
+                          - FLOAT
+                          - BOOLEAN
+                      encoding:
+                        type: string
+                        description: |
+                          The encoding of the values when the type is not STRING. Acceptable encoding values are:
+                          - TEXT - indicates values are alphanumeric text strings.
+                          - BINARY - indicates values are encoded using HBase Bytes.toBytes family of functions.
+                          'encoding' can also be set at the column family level. However, the setting at this level
+                          takes precedence if 'encoding' is set at both levels.
+                        enum:
+                          - TEXT
+                          - BINARY
+                      onlyReadLatest:
+                        type: boolean
+                        description: |
+                          If this is set, only the latest version of value in this column are exposed.
+                          'onlyReadLatest' can also be set at the column family level. However, the setting at
+                          this level takes precedence if 'onlyReadLatest' is set at both levels.
+          ignoreUnspecifiedColumnFamilies:
+            type: boolean
+            description: |
+              If field is true, then the column families that are not specified in columnFamilies list are not
+              exposed in the table schema. Otherwise, they are read with BYTES type values. The default value is false.
+          readRowkeyAsString:
+            type: boolean
+            description: |
+              If field is true, then the rowkey column families will be read and converted to string.
+              Otherwise they are read with BYTES type values and users need to manually cast them with CAST if necessary.
+              The default value is false.
+      googleSheetsOptions:
+        type: object
+        additionalProperties: false
+        description: |
+          Additional options if sourceFormat is set to GOOGLE_SHEETS.
+        properties:
+          skipLeadingRows:
+            type: number
+            description: |
+              The number of rows at the top of a sheet that BigQuery will skip when reading the data.
+              The default value is 0. This property is useful if you have header rows that should be skipped.
+              When autodetect is on, behavior is the following: * skipLeadingRows unspecified - Autodetect tries to
+              detect headers in the first row. If they are not detected, the row is read as data. Otherwise data
+              is read starting from the second row. * skipLeadingRows is 0 - Instructs autodetect that there are
+              no headers and data should be read starting from the first row. * skipLeadingRows = N > 0 - Autodetect
+              skips N-1 rows and tries to detect headers in row N. If headers are not detected, row N is just skipped.
+              Otherwise row N is used to extract column names for the detected schema.
+          range:
+            type: string
+            description: |
+              [Beta] Range of a sheet to query from. Only used when non-empty.
+      hivePartitioningMode:
+        type: string
+        description: |
+          [Experimental] When set, what mode of hive partitioning to use when reading data.
+          Two modes are supported:
+          - AUTO: automatically infer partition key name(s) and type(s).
+          - STRINGS: automatically infer partition key name(s). All types are strings.
+          Not all storage formats support hive partitioning -- requesting hive partitioning
+          on an unsupported format will lead to an error.
+        enum:
+          - AUTO
+          - STRINGS
+  clustering:
+    type: object
+    additionalProperties: false
+    description: |
+      Clustering specification for the table. Must be specified with time-based partitioning, data in the table
+      will be first partitioned and subsequently clustered.
+    required:
+      - fields
+    properties:
+      fields:
+        type: array
+        minItems: 1
+        uniqueItems: true
+        description: |
+          One or more fields on which data should be clustered. Only top-level, non-repeated, simple-type fields
+          are supported. The order of the fields will determine how clusters will be generated, so it is important.
+        items:
+          type: string
+  requirePartitionFilter:
+    type: boolean
+    description: |
+      [Beta] If set to true, queries over this table require a partition filter that can be used for
+      partition elimination to be specified.
   timePartitioning:
     type: object
-    description: The time-based partitioning specification for this table.
+    additionalProperties: false
+    description: |
+      The time-based partitioning specification for this table.
     properties:
       expirationMs:
         type: string
@@ -69,7 +391,7 @@ properties:
       requirePartitionFilter:
         type: boolean
         description: |
-          If True, queries over the table require a partition filter
+          [Beta] If True, queries over the table require a partition filter
           (that can be used for partition elimination) to be specified.
       type:
         type: string
@@ -78,6 +400,7 @@ properties:
           per day.
   view:
     type: object
+    additionalProperties: false
     description: The view definintion.
     properties:
       query:
@@ -94,6 +417,7 @@ properties:
           value.
       userDefinedFunctionResources:
         type: array
+        uniqueItems: true
         description: |
           User-defined function resources used in the query.
         items:
@@ -111,12 +435,14 @@ properties:
                 (gs://bucket/path).
   schema:
     type: array
+    uniqueItems: true
     description: |
       The schema for the data. Required for the CSV and JSON formats.
       Disallowed for the Google Cloud Bigtable, Cloud Datastore
       backups, and Avro formats.
     items:
       type: object
+      additionalProperties: false
       description: Defines the table fields.
       required:
         - name
@@ -167,6 +493,14 @@ properties:
           type: string
           description: |
             The field description. The maximum length is 1,024 characters.
+  labels:
+    type: object
+    description: |
+      Map labels associated with this table.
+      Example:
+        name: wrench
+        mass: 1.3kg
+        count: 3
 
 outputs:
   properties: