Fixes GoogleCloudPlatform#105: DM/bigquery: refactoring

GoogleCloudPlatform#105 - Added version, links to docs - Switched to using type provider - Added support for cross-project resource creation - Added missing fields to datasets: "friendlyName", "defaultPartitionExpirationMs", "labels", "access" - Added missing fields to tables: "description", "labels", "clustering", "requirePartitionFilter", "externalDataConfiguration", "encryptionConfiguration" - Fixed resource names - Added uniqueItems: true and additionalProperties: false
bohdanyurov-gl · Jun 10, 2019 · 88ea0ca · 88ea0ca
1 parent 8007e64
commit 88ea0ca
Show file tree

Hide file tree

Showing 4 changed files with 431 additions and 49 deletions.
diff --git a/dm/templates/bigquery/bigquery_dataset.py b/dm/templates/bigquery/bigquery_dataset.py
@@ -20,15 +20,18 @@ def generate_config(context):
     # You can modify the roles you wish to whitelist.
     whitelisted_roles = ['READER', 'WRITER', 'OWNER']
 
-    name = context.properties['name']
+    properties = context.properties
+    name = properties.get('name', context.env['name'])
+    project_id = properties.get('project', context.env['project'])
 
     properties = {
         'datasetReference':
             {
                 'datasetId': name,
-                'projectId': context.env['project']
+                'projectId': project_id
             },
-        'location': context.properties['location']
+        'location': context.properties['location'],
+        'projectId': project_id,
     }
 
     optional_properties = ['description', 'defaultTableExpirationMs']
@@ -68,32 +71,33 @@ def generate_config(context):
 
     resources = [
         {
-            'type': 'bigquery.v2.dataset',
-            'name': name,
+            # https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
+            'type': 'gcp-types/bigquery-v2:datasets',
+            'name': context.env['name'],
             'properties': properties
         }
     ]
 
     outputs = [
         {
             'name': 'selfLink',
-            'value': '$(ref.{}.selfLink)'.format(name)
+            'value': '$(ref.{}.selfLink)'.format(context.env['name'])
         },
         {
             'name': 'datasetId',
             'value': name
         },
         {
             'name': 'etag',
-            'value': '$(ref.{}.etag)'.format(name)
+            'value': '$(ref.{}.etag)'.format(context.env['name'])
         },
         {
             'name': 'creationTime',
-            'value': '$(ref.{}.creationTime)'.format(name)
+            'value': '$(ref.{}.creationTime)'.format(context.env['name'])
         },
         {
             'name': 'lastModifiedTime',
-            'value': '$(ref.{}.lastModifiedTime)'.format(name)
+            'value': '$(ref.{}.lastModifiedTime)'.format(context.env['name'])
         }
     ]
 

diff --git a/dm/templates/bigquery/bigquery_dataset.py.schema b/dm/templates/bigquery/bigquery_dataset.py.schema
@@ -15,11 +15,17 @@
 info:
   title: BigQuery Dataset
   author: Sourced Group Inc.
+  version: 1.0.0
   description: |
     Creates a BigQuery dataset.
+
     For information on this resource:
     https://cloud.google.com/bigquery/docs/.
 
+    APIs endpoints used by this template:
+    - gcp-types/bigquery-v2:datasets =>
+        https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
+
 imports:
   - path: bigquery_dataset.py
 
@@ -31,7 +37,21 @@ required:
 properties:
   name:
     type: string
-    description: The resource name.
+    description: |
+      The table dataset name. Resource name would be used if omitted.
+  project:
+    type: string
+    description: |
+      The project ID of the project containing the dataset. The
+      Google apps domain is prefixed if applicable.
+  friendlyName:
+    type: string
+    description: |
+      A descriptive name for the dataset.
+  description:
+    type: string
+    description: |
+      A user-friendly description of the dataset.
   location:
     type: string
     description: |
@@ -45,6 +65,7 @@ properties:
       - US
   access:
     type: array
+    uniqueItems: true
     description: |
       An array of objects that define dataset access for one or more
       entities. You can set this property when inserting or updating
@@ -56,42 +77,44 @@ properties:
         access.specialGroup: projectOwners; access.role: OWNER
         access.userByEmail: [dataset creator email]; access.role: OWNER
     items:
-      role:
-        type: string
-        description: |
-          The role (rights) granted to the user specified by the other
-          member of the access object. The following string values are
-          supported: READER, WRITER, OWNER. See details at 
-          https://cloud.google.com/bigquery/docs/access-control.
-        enum:
-          - READER
-          - WRITER
-          - OWNER
-      oneOf:
-        - domain:
+      type: object
+      additionalProperties: false
+      required:
+        - role
+      properties:
+        role:
+          type: string
+          description: |
+            An IAM role ID that should be granted to the user, group, or domain specified in this access entry.
+            The following legacy mappings will be applied: OWNER <=> roles/bigquery.dataOwner
+            WRITER <=> roles/bigquery.dataEditor READER <=> roles/bigquery.dataViewer This field will accept any of
+            the above formats, but will return only the legacy format. For example, if you set this field to
+            "roles/bigquery.dataOwner", it will be returned back as "OWNER". @mutable bigquery.datasets.update
+        domain:
           type: string
           description: |
             The domain to grant access to. All users signed in with the 
             specified domain are granted the corresponding access.
             Example: "example.com".
-        - userByEmail:
+        userByEmail:
           type: string
           description: |
             The email address of a user to grant access to. For example:
             [email protected].
-        - groupByEmail:
+        groupByEmail:
           type: string
           description: The email address of a Google Group to grant access to.
-        - specialGroup:
+        specialGroup:
           type: string
           description: |
             The special group to grant access to. Possible values include:
               projectOwners: owners of the enclosing project
               projectReaders: readers of the enclosing project
               projectWriters: writers of the enclosing project
               allAuthenticatedUsers: all authenticated BigQuery users
-        - view:
+        view:
           type: object
+          additionalProperties: false
           description: |
             A view from a different dataset to grant access to. Queries
             executed against that view have the Read access to tables in that
@@ -112,9 +135,6 @@ properties:
                 The table ID. The ID must contain only letters
                 (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
                 length is 1,024 characters.
-  description:
-    type: string
-    description: A user-friendly description of the dataset.
   setDefaultOwner:
     type: boolean
     default: False
@@ -136,6 +156,26 @@ properties:
       expirationTime while creating the table, that value takes precedence over
       the default expiration time indicated by this property.
     minimum: 3600000
+  defaultPartitionExpirationMs:
+    type: string
+    format: int64
+    description: |
+      The default partition expiration for all partitioned tables in the dataset, in milliseconds.
+      Once this property is set, all newly-created partitioned tables in the dataset will have an expirationMs
+      property in the timePartitioning settings set to this value, and changing the value will only affect new tables,
+      not existing ones. The storage in a partition will have an expiration time of its partition time plus this value.
+      Setting this property overrides the use of defaultTableExpirationMs for partitioned tables: only one of
+      defaultTableExpirationMs and defaultPartitionExpirationMs will be used for any new partitioned table.
+      If you provide an explicit timePartitioning.expirationMs when creating or updating a partitioned table,
+      that value takes precedence over the default partition expiration time indicated by this property.
+  labels:
+    type: object
+    description: |
+      Map labels associated with this dataset.
+      Example:
+        name: wrench
+        mass: 1.3kg
+        count: 3
 
 outputs:
   properties:

diff --git a/dm/templates/bigquery/bigquery_table.py b/dm/templates/bigquery/bigquery_table.py
@@ -18,16 +18,19 @@
 def generate_config(context):
     """ Entry point for the deployment resources. """
 
-    name = context.properties['name']
+    properties = context.properties
+    name = properties.get('name', context.env['name'])
+    project_id = properties.get('project', context.env['project'])
 
     properties = {
         'tableReference':
             {
                 'tableId': name,
                 'datasetId': context.properties['datasetId'],
-                'projectId': context.env['project']
+                'projectId': project_id
             },
-        'datasetId': context.properties['datasetId']
+        'datasetId': context.properties['datasetId'],
+        'projectId': project_id,
     }
 
     optional_properties = [
@@ -48,8 +51,9 @@ def generate_config(context):
 
     resources = [
         {
-            'type': 'bigquery.v2.table',
-            'name': name,
+            # https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
+            'type': 'gcp-types/bigquery-v2:tables',
+            'name': context.env['name'],
             'properties': properties,
             'metadata': {
                 'dependsOn': [context.properties['datasetId']]
@@ -60,39 +64,39 @@ def generate_config(context):
     outputs = [
         {
             'name': 'selfLink',
-            'value': '$(ref.{}.selfLink)'.format(name)
+            'value': '$(ref.{}.selfLink)'.format(context.env['name'])
         },
         {
             'name': 'etag',
-            'value': '$(ref.{}.etag)'.format(name)
+            'value': '$(ref.{}.etag)'.format(context.env['name'])
         },
         {
             'name': 'creationTime',
-            'value': '$(ref.{}.creationTime)'.format(name)
+            'value': '$(ref.{}.creationTime)'.format(context.env['name'])
         },
         {
             'name': 'lastModifiedTime',
-            'value': '$(ref.{}.lastModifiedTime)'.format(name)
+            'value': '$(ref.{}.lastModifiedTime)'.format(context.env['name'])
         },
         {
             'name': 'location',
-            'value': '$(ref.{}.location)'.format(name)
+            'value': '$(ref.{}.location)'.format(context.env['name'])
         },
         {
             'name': 'numBytes',
-            'value': '$(ref.{}.numBytes)'.format(name)
+            'value': '$(ref.{}.numBytes)'.format(context.env['name'])
         },
         {
             'name': 'numLongTermBytes',
-            'value': '$(ref.{}.numLongTermBytes)'.format(name)
+            'value': '$(ref.{}.numLongTermBytes)'.format(context.env['name'])
         },
         {
             'name': 'numRows',
-            'value': '$(ref.{}.numRows)'.format(name)
+            'value': '$(ref.{}.numRows)'.format(context.env['name'])
         },
         {
             'name': 'type',
-            'value': '$(ref.{}.type)'.format(name)
+            'value': '$(ref.{}.type)'.format(context.env['name'])
         }
     ]