Skip to content

Commit

Permalink
Fixes GoogleCloudPlatform#105: DM/bigquery: refactoring
Browse files Browse the repository at this point in the history
GoogleCloudPlatform#105

- Added version, links to docs
- Switched to using type provider
- Added support for cross-project resource creation
- Added missing fields to datasets: "friendlyName",
"defaultPartitionExpirationMs", "labels", "access"
- Added missing fields to tables: "description", "labels", "clustering",
"requirePartitionFilter", "externalDataConfiguration",
"encryptionConfiguration"
- Fixed resource names
- Added uniqueItems: true and additionalProperties: false
  • Loading branch information
bohdanyurov-gl committed Jun 10, 2019
1 parent 8007e64 commit 88ea0ca
Show file tree
Hide file tree
Showing 4 changed files with 431 additions and 49 deletions.
22 changes: 13 additions & 9 deletions dm/templates/bigquery/bigquery_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@ def generate_config(context):
# You can modify the roles you wish to whitelist.
whitelisted_roles = ['READER', 'WRITER', 'OWNER']

name = context.properties['name']
properties = context.properties
name = properties.get('name', context.env['name'])
project_id = properties.get('project', context.env['project'])

properties = {
'datasetReference':
{
'datasetId': name,
'projectId': context.env['project']
'projectId': project_id
},
'location': context.properties['location']
'location': context.properties['location'],
'projectId': project_id,
}

optional_properties = ['description', 'defaultTableExpirationMs']
Expand Down Expand Up @@ -68,32 +71,33 @@ def generate_config(context):

resources = [
{
'type': 'bigquery.v2.dataset',
'name': name,
# https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets
'type': 'gcp-types/bigquery-v2:datasets',
'name': context.env['name'],
'properties': properties
}
]

outputs = [
{
'name': 'selfLink',
'value': '$(ref.{}.selfLink)'.format(name)
'value': '$(ref.{}.selfLink)'.format(context.env['name'])
},
{
'name': 'datasetId',
'value': name
},
{
'name': 'etag',
'value': '$(ref.{}.etag)'.format(name)
'value': '$(ref.{}.etag)'.format(context.env['name'])
},
{
'name': 'creationTime',
'value': '$(ref.{}.creationTime)'.format(name)
'value': '$(ref.{}.creationTime)'.format(context.env['name'])
},
{
'name': 'lastModifiedTime',
'value': '$(ref.{}.lastModifiedTime)'.format(name)
'value': '$(ref.{}.lastModifiedTime)'.format(context.env['name'])
}
]

Expand Down
82 changes: 61 additions & 21 deletions dm/templates/bigquery/bigquery_dataset.py.schema
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,17 @@
info:
title: BigQuery Dataset
author: Sourced Group Inc.
version: 1.0.0
description: |
Creates a BigQuery dataset.

For information on this resource:
https://cloud.google.com/bigquery/docs/.

APIs endpoints used by this template:
- gcp-types/bigquery-v2:datasets =>
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets

imports:
- path: bigquery_dataset.py

Expand All @@ -31,7 +37,21 @@ required:
properties:
name:
type: string
description: The resource name.
description: |
The table dataset name. Resource name would be used if omitted.
project:
type: string
description: |
The project ID of the project containing the dataset. The
Google apps domain is prefixed if applicable.
friendlyName:
type: string
description: |
A descriptive name for the dataset.
description:
type: string
description: |
A user-friendly description of the dataset.
location:
type: string
description: |
Expand All @@ -45,6 +65,7 @@ properties:
- US
access:
type: array
uniqueItems: true
description: |
An array of objects that define dataset access for one or more
entities. You can set this property when inserting or updating
Expand All @@ -56,42 +77,44 @@ properties:
access.specialGroup: projectOwners; access.role: OWNER
access.userByEmail: [dataset creator email]; access.role: OWNER
items:
role:
type: string
description: |
The role (rights) granted to the user specified by the other
member of the access object. The following string values are
supported: READER, WRITER, OWNER. See details at
https://cloud.google.com/bigquery/docs/access-control.
enum:
- READER
- WRITER
- OWNER
oneOf:
- domain:
type: object
additionalProperties: false
required:
- role
properties:
role:
type: string
description: |
An IAM role ID that should be granted to the user, group, or domain specified in this access entry.
The following legacy mappings will be applied: OWNER <=> roles/bigquery.dataOwner
WRITER <=> roles/bigquery.dataEditor READER <=> roles/bigquery.dataViewer This field will accept any of
the above formats, but will return only the legacy format. For example, if you set this field to
"roles/bigquery.dataOwner", it will be returned back as "OWNER". @mutable bigquery.datasets.update
domain:
type: string
description: |
The domain to grant access to. All users signed in with the
specified domain are granted the corresponding access.
Example: "example.com".
- userByEmail:
userByEmail:
type: string
description: |
The email address of a user to grant access to. For example:
[email protected].
- groupByEmail:
groupByEmail:
type: string
description: The email address of a Google Group to grant access to.
- specialGroup:
specialGroup:
type: string
description: |
The special group to grant access to. Possible values include:
projectOwners: owners of the enclosing project
projectReaders: readers of the enclosing project
projectWriters: writers of the enclosing project
allAuthenticatedUsers: all authenticated BigQuery users
- view:
view:
type: object
additionalProperties: false
description: |
A view from a different dataset to grant access to. Queries
executed against that view have the Read access to tables in that
Expand All @@ -112,9 +135,6 @@ properties:
The table ID. The ID must contain only letters
(a-z, A-Z), numbers (0-9), or underscores (_). The maximum
length is 1,024 characters.
description:
type: string
description: A user-friendly description of the dataset.
setDefaultOwner:
type: boolean
default: False
Expand All @@ -136,6 +156,26 @@ properties:
expirationTime while creating the table, that value takes precedence over
the default expiration time indicated by this property.
minimum: 3600000
defaultPartitionExpirationMs:
type: string
format: int64
description: |
The default partition expiration for all partitioned tables in the dataset, in milliseconds.
Once this property is set, all newly-created partitioned tables in the dataset will have an expirationMs
property in the timePartitioning settings set to this value, and changing the value will only affect new tables,
not existing ones. The storage in a partition will have an expiration time of its partition time plus this value.
Setting this property overrides the use of defaultTableExpirationMs for partitioned tables: only one of
defaultTableExpirationMs and defaultPartitionExpirationMs will be used for any new partitioned table.
If you provide an explicit timePartitioning.expirationMs when creating or updating a partitioned table,
that value takes precedence over the default partition expiration time indicated by this property.
labels:
type: object
description: |
Map labels associated with this dataset.
Example:
name: wrench
mass: 1.3kg
count: 3

outputs:
properties:
Expand Down
32 changes: 18 additions & 14 deletions dm/templates/bigquery/bigquery_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,19 @@
def generate_config(context):
""" Entry point for the deployment resources. """

name = context.properties['name']
properties = context.properties
name = properties.get('name', context.env['name'])
project_id = properties.get('project', context.env['project'])

properties = {
'tableReference':
{
'tableId': name,
'datasetId': context.properties['datasetId'],
'projectId': context.env['project']
'projectId': project_id
},
'datasetId': context.properties['datasetId']
'datasetId': context.properties['datasetId'],
'projectId': project_id,
}

optional_properties = [
Expand All @@ -48,8 +51,9 @@ def generate_config(context):

resources = [
{
'type': 'bigquery.v2.table',
'name': name,
# https://cloud.google.com/bigquery/docs/reference/rest/v2/tables
'type': 'gcp-types/bigquery-v2:tables',
'name': context.env['name'],
'properties': properties,
'metadata': {
'dependsOn': [context.properties['datasetId']]
Expand All @@ -60,39 +64,39 @@ def generate_config(context):
outputs = [
{
'name': 'selfLink',
'value': '$(ref.{}.selfLink)'.format(name)
'value': '$(ref.{}.selfLink)'.format(context.env['name'])
},
{
'name': 'etag',
'value': '$(ref.{}.etag)'.format(name)
'value': '$(ref.{}.etag)'.format(context.env['name'])
},
{
'name': 'creationTime',
'value': '$(ref.{}.creationTime)'.format(name)
'value': '$(ref.{}.creationTime)'.format(context.env['name'])
},
{
'name': 'lastModifiedTime',
'value': '$(ref.{}.lastModifiedTime)'.format(name)
'value': '$(ref.{}.lastModifiedTime)'.format(context.env['name'])
},
{
'name': 'location',
'value': '$(ref.{}.location)'.format(name)
'value': '$(ref.{}.location)'.format(context.env['name'])
},
{
'name': 'numBytes',
'value': '$(ref.{}.numBytes)'.format(name)
'value': '$(ref.{}.numBytes)'.format(context.env['name'])
},
{
'name': 'numLongTermBytes',
'value': '$(ref.{}.numLongTermBytes)'.format(name)
'value': '$(ref.{}.numLongTermBytes)'.format(context.env['name'])
},
{
'name': 'numRows',
'value': '$(ref.{}.numRows)'.format(name)
'value': '$(ref.{}.numRows)'.format(context.env['name'])
},
{
'name': 'type',
'value': '$(ref.{}.type)'.format(name)
'value': '$(ref.{}.type)'.format(context.env['name'])
}
]

Expand Down
Loading

0 comments on commit 88ea0ca

Please sign in to comment.