Skip to content

Commit

Permalink
feat(bigquery): add Dataset.default_partition_expiration_ms and Table…
Browse files Browse the repository at this point in the history
….require_partition_filter properties (#9464)

* feat(bigquery): add Dataset.default_partiion_expiration_ms and Table.require_partition_filter properties

Samples double as system tests. I aim to use them here:

* https://cloud.google.com/bigquery/docs/managing-partitioned-tables#require-filter
* https://cloud.google.com/bigquery/docs/updating-datasets#partition-expiration

Note: this also (silently) deprecates
TimePartitioning.require_partition_filter, as that's duplicating the
same functionality.

I was curious why the expiration_ms wasn't also moving up, but then I
realized that property only makes sense if a partition is assocatied
with a timestamp.

* add deprecation warning to require_partition_filter

* blacken
  • Loading branch information
tswast authored Oct 15, 2019
1 parent a79d98d commit 2fdd6e6
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 42 deletions.
29 changes: 29 additions & 0 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ class Dataset(object):
_PROPERTY_TO_API_FIELD = {
"access_entries": "access",
"created": "creationTime",
"default_partition_expiration_ms": "defaultPartitionExpirationMs",
"default_table_expiration_ms": "defaultTableExpirationMs",
"friendly_name": "friendlyName",
}
Expand Down Expand Up @@ -460,6 +461,34 @@ def self_link(self):
"""
return self._properties.get("selfLink")

@property
def default_partition_expiration_ms(self):
"""Optional[int]: The default partition expiration for all
partitioned tables in the dataset, in milliseconds.
Once this property is set, all newly-created partitioned tables in
the dataset will have an ``time_paritioning.expiration_ms`` property
set to this value, and changing the value will only affect new
tables, not existing ones. The storage in a partition will have an
expiration time of its partition time plus this value.
Setting this property overrides the use of
``default_table_expiration_ms`` for partitioned tables: only one of
``default_table_expiration_ms`` and
``default_partition_expiration_ms`` will be used for any new
partitioned table. If you provide an explicit
``time_partitioning.expiration_ms`` when creating or updating a
partitioned table, that value takes precedence over the default
partition expiration time indicated by this property.
"""
return _helpers._int_or_none(
self._properties.get("defaultPartitionExpirationMs")
)

@default_partition_expiration_ms.setter
def default_partition_expiration_ms(self, value):
self._properties["defaultPartitionExpirationMs"] = _helpers._str_or_none(value)

@property
def default_table_expiration_ms(self):
"""Union[int, None]: Default expiration time for tables in the dataset
Expand Down
41 changes: 38 additions & 3 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ class Table(object):
"view_query": "view",
"external_data_configuration": "externalDataConfiguration",
"encryption_configuration": "encryptionConfiguration",
"require_partition_filter": "requirePartitionFilter",
}

def __init__(self, table_ref, schema=None):
Expand Down Expand Up @@ -420,6 +421,18 @@ def path(self):
self.table_id,
)

@property
def require_partition_filter(self):
"""bool: If set to true, queries over the partitioned table require a
partition filter that can be used for partition elimination to be
specified.
"""
return self._properties.get("requirePartitionFilter")

@require_partition_filter.setter
def require_partition_filter(self, value):
self._properties["requirePartitionFilter"] = value

@property
def schema(self):
"""List[google.cloud.bigquery.schema.SchemaField]: Table's schema.
Expand Down Expand Up @@ -1722,9 +1735,9 @@ class TimePartitioning(object):
Number of milliseconds for which to keep the storage for a
partition.
require_partition_filter (bool, optional):
If set to true, queries over the partitioned table require a
partition filter that can be used for partition elimination to be
specified.
DEPRECATED: Use
:attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,
instead.
"""

def __init__(
Expand Down Expand Up @@ -1777,11 +1790,33 @@ def expiration_ms(self, value):
@property
def require_partition_filter(self):
"""bool: Specifies whether partition filters are required for queries
DEPRECATED: Use
:attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,
instead.
"""
warnings.warn(
(
"TimePartitioning.require_partition_filter will be removed in "
"future versions. Please use Table.require_partition_filter "
"instead."
),
PendingDeprecationWarning,
stacklevel=2,
)
return self._properties.get("requirePartitionFilter")

@require_partition_filter.setter
def require_partition_filter(self, value):
warnings.warn(
(
"TimePartitioning.require_partition_filter will be removed in "
"future versions. Please use Table.require_partition_filter "
"instead."
),
PendingDeprecationWarning,
stacklevel=2,
)
self._properties["requirePartitionFilter"] = value

@classmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .. import update_dataset_default_partition_expiration


def test_update_dataset_default_partition_expiration(capsys, client, dataset_id):

ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds

update_dataset_default_partition_expiration.update_dataset_default_partition_expiration(
client, dataset_id
)
out, _ = capsys.readouterr()
assert (
"Updated dataset {} with new default partition expiration {}".format(
dataset_id, ninety_days_ms
)
in out
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from google.cloud import bigquery
from .. import update_table_require_partition_filter


def test_update_table_require_partition_filter(capsys, client, random_table_id):
# Make a partitioned table.
schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
table = bigquery.Table(random_table_id, schema)
table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
table = client.create_table(table)

update_table_require_partition_filter.update_table_require_partition_filter(
client, random_table_id
)
out, _ = capsys.readouterr()
assert (
"Updated table '{}' with require_partition_filter=True".format(random_table_id)
in out
)
43 changes: 43 additions & 0 deletions bigquery/samples/update_dataset_default_partition_expiration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def update_dataset_default_partition_expiration(client, dataset_id):

# [START bigquery_update_dataset_partition_expiration]
# TODO(developer): Import the client library.
# from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
# dataset_id = 'your-project.your_dataset'

dataset = client.get_dataset(dataset_id) # Make an API request.

# Set the default partition expiration (applies to new tables, only) in
# milliseconds. This example sets the default expiration to 90 days.
dataset.default_partition_expiration_ms = 90 * 24 * 60 * 60 * 1000

dataset = client.update_dataset(
dataset, ["default_partition_expiration_ms"]
) # Make an API request.

print(
"Updated dataset {}.{} with new default partition expiration {}".format(
dataset.project, dataset.dataset_id, dataset.default_partition_expiration_ms
)
)
# [END bigquery_update_dataset_partition_expiration]
41 changes: 41 additions & 0 deletions bigquery/samples/update_table_require_partition_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def update_table_require_partition_filter(client, table_id):

# [START bigquery_update_table_require_partition_filter]
# TODO(developer): Import the client library.
# from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the model to fetch.
# table_id = 'your-project.your_dataset.your_table'

table = client.get_table(table_id) # Make an API request.
table.require_partition_filter = True
table = client.update_table(table, ["require_partition_filter"])

# View table properties
print(
"Updated table '{}.{}.{}' with require_partition_filter={}.".format(
table.project,
table.dataset_id,
table.table_id,
table.require_partition_filter,
)
)
# [END bigquery_update_table_require_partition_filter]
8 changes: 8 additions & 0 deletions bigquery/tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,14 @@ def test_access_entries_setter(self):
dataset.access_entries = [phred, bharney]
self.assertEqual(dataset.access_entries, [phred, bharney])

def test_default_partition_expiration_ms(self):
dataset = self._make_one("proj.dset")
assert dataset.default_partition_expiration_ms is None
dataset.default_partition_expiration_ms = 12345
assert dataset.default_partition_expiration_ms == 12345
dataset.default_partition_expiration_ms = None
assert dataset.default_partition_expiration_ms is None

def test_default_table_expiration_ms_setter_bad_value(self):
dataset = self._make_one(self.DS_REF)
with self.assertRaises(ValueError):
Expand Down
Loading

0 comments on commit 2fdd6e6

Please sign in to comment.