From 47d3202d544135b110ba4852d3c26a5d001eef8b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 14 Oct 2019 16:06:48 -0700 Subject: [PATCH 1/3] feat(bigquery): add Dataset.default_partiion_expiration_ms and Table.require_partition_filter properties Samples double as system tests. I aim to use them here: * https://cloud.google.com/bigquery/docs/managing-partitioned-tables#require-filter * https://cloud.google.com/bigquery/docs/updating-datasets#partition-expiration Note: this also (silently) deprecates TimePartitioning.require_partition_filter, as that's duplicating the same functionality. I was curious why the expiration_ms wasn't also moving up, but then I realized that property only makes sense if a partition is assocatied with a timestamp. --- bigquery/google/cloud/bigquery/dataset.py | 29 +++++++++++++ bigquery/google/cloud/bigquery/table.py | 23 ++++++++-- ...te_dataset_default_partition_expiration.py | 31 +++++++++++++ ...t_update_table_require_partition_filter.py | 33 ++++++++++++++ ...te_dataset_default_partition_expiration.py | 43 +++++++++++++++++++ .../update_table_require_partition_filter.py | 41 ++++++++++++++++++ bigquery/tests/unit/test_dataset.py | 8 ++++ bigquery/tests/unit/test_table.py | 11 +++++ 8 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 bigquery/samples/tests/test_update_dataset_default_partition_expiration.py create mode 100644 bigquery/samples/tests/test_update_table_require_partition_filter.py create mode 100644 bigquery/samples/update_dataset_default_partition_expiration.py create mode 100644 bigquery/samples/update_table_require_partition_filter.py diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 3b241dd7776d..ced80581a758 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -358,6 +358,7 @@ class Dataset(object): _PROPERTY_TO_API_FIELD = { "access_entries": "access", "created": "creationTime", + "default_partition_expiration_ms": "defaultPartitionExpirationMs", "default_table_expiration_ms": "defaultTableExpirationMs", "friendly_name": "friendlyName", } @@ -460,6 +461,34 @@ def self_link(self): """ return self._properties.get("selfLink") + @property + def default_partition_expiration_ms(self): + """Optional[int]: The default partition expiration for all + partitioned tables in the dataset, in milliseconds. + + Once this property is set, all newly-created partitioned tables in + the dataset will have an ``time_paritioning.expiration_ms`` property + set to this value, and changing the value will only affect new + tables, not existing ones. The storage in a partition will have an + expiration time of its partition time plus this value. + + Setting this property overrides the use of + ``default_table_expiration_ms`` for partitioned tables: only one of + ``default_table_expiration_ms`` and + ``default_partition_expiration_ms`` will be used for any new + partitioned table. If you provide an explicit + ``time_partitioning.expiration_ms`` when creating or updating a + partitioned table, that value takes precedence over the default + partition expiration time indicated by this property. + """ + return _helpers._int_or_none( + self._properties.get("defaultPartitionExpirationMs") + ) + + @default_partition_expiration_ms.setter + def default_partition_expiration_ms(self, value): + self._properties["defaultPartitionExpirationMs"] = _helpers._str_or_none(value) + @property def default_table_expiration_ms(self): """Union[int, None]: Default expiration time for tables in the dataset diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index f3c0916811e7..16aca2dd539d 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -385,6 +385,7 @@ class Table(object): "view_query": "view", "external_data_configuration": "externalDataConfiguration", "encryption_configuration": "encryptionConfiguration", + "require_partition_filter": "requirePartitionFilter", } def __init__(self, table_ref, schema=None): @@ -420,6 +421,18 @@ def path(self): self.table_id, ) + @property + def require_partition_filter(self): + """bool: If set to true, queries over the partitioned table require a + partition filter that can be used for partition elimination to be + specified. + """ + return self._properties.get("requirePartitionFilter") + + @require_partition_filter.setter + def require_partition_filter(self, value): + self._properties["requirePartitionFilter"] = value + @property def schema(self): """List[google.cloud.bigquery.schema.SchemaField]: Table's schema. @@ -1722,9 +1735,9 @@ class TimePartitioning(object): Number of milliseconds for which to keep the storage for a partition. require_partition_filter (bool, optional): - If set to true, queries over the partitioned table require a - partition filter that can be used for partition elimination to be - specified. + DEPRECATED: Use + :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, + instead. """ def __init__( @@ -1777,6 +1790,10 @@ def expiration_ms(self, value): @property def require_partition_filter(self): """bool: Specifies whether partition filters are required for queries + + DEPRECATED: Use + :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, + instead. """ return self._properties.get("requirePartitionFilter") diff --git a/bigquery/samples/tests/test_update_dataset_default_partition_expiration.py b/bigquery/samples/tests/test_update_dataset_default_partition_expiration.py new file mode 100644 index 000000000000..55fa4b0d96fb --- /dev/null +++ b/bigquery/samples/tests/test_update_dataset_default_partition_expiration.py @@ -0,0 +1,31 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import update_dataset_default_partition_expiration + + +def test_update_dataset_default_partition_expiration(capsys, client, dataset_id): + + ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds + + update_dataset_default_partition_expiration.update_dataset_default_partition_expiration( + client, dataset_id + ) + out, _ = capsys.readouterr() + assert ( + "Updated dataset {} with new default partition expiration {}".format( + dataset_id, ninety_days_ms + ) + in out + ) diff --git a/bigquery/samples/tests/test_update_table_require_partition_filter.py b/bigquery/samples/tests/test_update_table_require_partition_filter.py new file mode 100644 index 000000000000..1cbd2b2279b2 --- /dev/null +++ b/bigquery/samples/tests/test_update_table_require_partition_filter.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +from .. import update_table_require_partition_filter + + +def test_update_table_require_partition_filter(capsys, client, random_table_id): + # Make a partitioned table. + schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] + table = bigquery.Table(random_table_id, schema) + table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp") + table = client.create_table(table) + + update_table_require_partition_filter.update_table_require_partition_filter( + client, random_table_id + ) + out, _ = capsys.readouterr() + assert ( + "Updated table '{}' with require_partition_filter=True".format(random_table_id) + in out + ) diff --git a/bigquery/samples/update_dataset_default_partition_expiration.py b/bigquery/samples/update_dataset_default_partition_expiration.py new file mode 100644 index 000000000000..502d52ff199b --- /dev/null +++ b/bigquery/samples/update_dataset_default_partition_expiration.py @@ -0,0 +1,43 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_dataset_default_partition_expiration(client, dataset_id): + + # [START bigquery_update_dataset_partition_expiration] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = 'your-project.your_dataset' + + dataset = client.get_dataset(dataset_id) # Make an API request. + + # Set the default partition expiration (applies to new tables, only) in + # milliseconds. This example sets the default expiration to 90 days. + dataset.default_partition_expiration_ms = 90 * 24 * 60 * 60 * 1000 + + dataset = client.update_dataset( + dataset, ["default_partition_expiration_ms"] + ) # Make an API request. + + print( + "Updated dataset {}.{} with new default partition expiration {}".format( + dataset.project, dataset.dataset_id, dataset.default_partition_expiration_ms + ) + ) + # [END bigquery_update_dataset_partition_expiration] diff --git a/bigquery/samples/update_table_require_partition_filter.py b/bigquery/samples/update_table_require_partition_filter.py new file mode 100644 index 000000000000..4c6be2d2cedc --- /dev/null +++ b/bigquery/samples/update_table_require_partition_filter.py @@ -0,0 +1,41 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_table_require_partition_filter(client, table_id): + + # [START bigquery_update_table_require_partition_filter] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the model to fetch. + # table_id = 'your-project.your_dataset.your_table' + + table = client.get_table(table_id) # Make an API request. + table.require_partition_filter = True + table = client.update_table(table, ["require_partition_filter"]) + + # View table properties + print( + "Updated table '{}.{}.{}' with require_partition_filter={}.".format( + table.project, + table.dataset_id, + table.table_id, + table.require_partition_filter, + ) + ) + # [END bigquery_update_table_require_partition_filter] diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index 26b1729a240c..9b2276480843 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -454,6 +454,14 @@ def test_access_entries_setter(self): dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) + def test_default_partition_expiration_ms(self): + dataset = self._make_one("proj.dset") + assert dataset.default_partition_expiration_ms is None + dataset.default_partition_expiration_ms = 12345 + assert dataset.default_partition_expiration_ms == 12345 + dataset.default_partition_expiration_ms = None + assert dataset.default_partition_expiration_ms is None + def test_default_table_expiration_ms_setter_bad_value(self): dataset = self._make_one(self.DS_REF) with self.assertRaises(ValueError): diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index 562bcf6b4e7d..1330eb62d773 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -928,6 +928,17 @@ def test__build_resource_w_custom_field_not_in__properties(self): with self.assertRaises(ValueError): table._build_resource(["bad"]) + def test_require_partitioning_filter(self): + table = self._make_one("proj.dset.tbl") + assert table.require_partition_filter is None + table.require_partition_filter = True + assert table.require_partition_filter + table.require_partition_filter = False + assert table.require_partition_filter is not None + assert not table.require_partition_filter + table.require_partition_filter = None + assert table.require_partition_filter is None + def test_time_partitioning_getter(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType From 84e2c3cca903d78e56706c5e777ff4d9b322ef79 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 15 Oct 2019 14:26:31 -0700 Subject: [PATCH 2/3] add deprecation warning to require_partition_filter --- bigquery/google/cloud/bigquery/table.py | 18 +++++++ bigquery/tests/unit/test_table.py | 62 ++++++++++++++++++------- 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 16aca2dd539d..4373d99c590f 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -1795,10 +1795,28 @@ def require_partition_filter(self): :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, instead. """ + warnings.warn( + ( + "TimePartitioning.require_partition_filter will be removed in " + "future versions. Please use Table.require_partition_filter " + "instead." + ), + PendingDeprecationWarning, + stacklevel=2, + ) return self._properties.get("requirePartitionFilter") @require_partition_filter.setter def require_partition_filter(self, value): + warnings.warn( + ( + "TimePartitioning.require_partition_filter will be removed in " + "future versions. Please use Table.require_partition_filter " + "instead." + ), + PendingDeprecationWarning, + stacklevel=2, + ) self._properties["requirePartitionFilter"] = value @classmethod diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index 1330eb62d773..896cf65c39b1 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -957,7 +957,12 @@ def test_time_partitioning_getter(self): self.assertEqual(table.time_partitioning.type_, TimePartitioningType.DAY) self.assertEqual(table.time_partitioning.field, "col1") self.assertEqual(table.time_partitioning.expiration_ms, 123456) - self.assertFalse(table.time_partitioning.require_partition_filter) + + with warnings.catch_warnings(record=True) as warned: + self.assertFalse(table.time_partitioning.require_partition_filter) + + assert len(warned) == 1 + self.assertIs(warned[0].category, PendingDeprecationWarning) def test_time_partitioning_getter_w_none(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -985,7 +990,12 @@ def test_time_partitioning_getter_w_empty(self): self.assertIsNone(table.time_partitioning.type_) self.assertIsNone(table.time_partitioning.field) self.assertIsNone(table.time_partitioning.expiration_ms) - self.assertIsNone(table.time_partitioning.require_partition_filter) + + with warnings.catch_warnings(record=True) as warned: + self.assertIsNone(table.time_partitioning.require_partition_filter) + + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning @@ -2846,11 +2856,9 @@ def _make_one(self, *args, **kw): def test_constructor_defaults(self): time_partitioning = self._make_one() - self.assertEqual(time_partitioning.type_, "DAY") self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) - self.assertIsNone(time_partitioning.require_partition_filter) def test_constructor_explicit(self): from google.cloud.bigquery.table import TimePartitioningType @@ -2859,13 +2867,23 @@ def test_constructor_explicit(self): type_=TimePartitioningType.DAY, field="name", expiration_ms=10000, - require_partition_filter=True, ) self.assertEqual(time_partitioning.type_, "DAY") self.assertEqual(time_partitioning.field, "name") self.assertEqual(time_partitioning.expiration_ms, 10000) - self.assertTrue(time_partitioning.require_partition_filter) + + def test_require_partition_filter_warns_deprecation(self): + object_under_test = self._make_one() + + with warnings.catch_warnings(record=True) as warned: + assert object_under_test.require_partition_filter is None + object_under_test.require_partition_filter = True + assert object_under_test.require_partition_filter + + assert len(warned) == 3 + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) def test_from_api_repr_empty(self): klass = self._get_target_class() @@ -2879,7 +2897,6 @@ def test_from_api_repr_empty(self): self.assertIsNone(time_partitioning.type_) self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) - self.assertIsNone(time_partitioning.require_partition_filter) def test_from_api_repr_minimal(self): from google.cloud.bigquery.table import TimePartitioningType @@ -2891,7 +2908,6 @@ def test_from_api_repr_minimal(self): self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) self.assertIsNone(time_partitioning.field) self.assertIsNone(time_partitioning.expiration_ms) - self.assertIsNone(time_partitioning.require_partition_filter) def test_from_api_repr_doesnt_override_type(self): klass = self._get_target_class() @@ -2914,7 +2930,11 @@ def test_from_api_repr_explicit(self): self.assertEqual(time_partitioning.type_, TimePartitioningType.DAY) self.assertEqual(time_partitioning.field, "name") self.assertEqual(time_partitioning.expiration_ms, 10000) - self.assertTrue(time_partitioning.require_partition_filter) + + with warnings.catch_warnings(record=True) as warned: + self.assertTrue(time_partitioning.require_partition_filter) + + self.assertIs(warned[0].category, PendingDeprecationWarning) def test_to_api_repr_defaults(self): time_partitioning = self._make_one() @@ -2928,9 +2948,13 @@ def test_to_api_repr_explicit(self): type_=TimePartitioningType.DAY, field="name", expiration_ms=10000, - require_partition_filter=True, ) + with warnings.catch_warnings(record=True) as warned: + time_partitioning.require_partition_filter = True + + self.assertIs(warned[0].category, PendingDeprecationWarning) + expected = { "type": "DAY", "field": "name", @@ -2962,19 +2986,27 @@ def test___eq___expiration_ms_mismatch(self): def test___eq___require_partition_filter_mismatch(self): time_partitioning = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=True + field="foo", expiration_ms=100000 ) other = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=False + field="foo", expiration_ms=100000 ) + with warnings.catch_warnings(record=True) as warned: + time_partitioning.require_partition_filter = True + other.require_partition_filter = False + + assert len(warned) == 2 + for warning in warned: + self.assertIs(warning.category, PendingDeprecationWarning) + self.assertNotEqual(time_partitioning, other) def test___eq___hit(self): time_partitioning = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=True + field="foo", expiration_ms=100000 ) other = self._make_one( - field="foo", expiration_ms=100000, require_partition_filter=True + field="foo", expiration_ms=100000 ) self.assertEqual(time_partitioning, other) @@ -3022,13 +3054,11 @@ def test___repr___explicit(self): type_=TimePartitioningType.DAY, field="name", expiration_ms=10000, - require_partition_filter=True, ) expected = ( "TimePartitioning(" "expirationMs=10000," "field=name," - "requirePartitionFilter=True," "type=DAY)" ) self.assertEqual(repr(time_partitioning), expected) From c4b75b6a8629105a20fafeffbfe55af2b7373cb9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 15 Oct 2019 14:27:29 -0700 Subject: [PATCH 3/3] blacken --- bigquery/tests/unit/test_table.py | 35 +++++++------------------------ 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index 896cf65c39b1..68fc71db3f10 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -2864,9 +2864,7 @@ def test_constructor_explicit(self): from google.cloud.bigquery.table import TimePartitioningType time_partitioning = self._make_one( - type_=TimePartitioningType.DAY, - field="name", - expiration_ms=10000, + type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) self.assertEqual(time_partitioning.type_, "DAY") @@ -2945,9 +2943,7 @@ def test_to_api_repr_explicit(self): from google.cloud.bigquery.table import TimePartitioningType time_partitioning = self._make_one( - type_=TimePartitioningType.DAY, - field="name", - expiration_ms=10000, + type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) with warnings.catch_warnings(record=True) as warned: @@ -2985,12 +2981,8 @@ def test___eq___expiration_ms_mismatch(self): self.assertNotEqual(time_partitioning, other) def test___eq___require_partition_filter_mismatch(self): - time_partitioning = self._make_one( - field="foo", expiration_ms=100000 - ) - other = self._make_one( - field="foo", expiration_ms=100000 - ) + time_partitioning = self._make_one(field="foo", expiration_ms=100000) + other = self._make_one(field="foo", expiration_ms=100000) with warnings.catch_warnings(record=True) as warned: time_partitioning.require_partition_filter = True other.require_partition_filter = False @@ -3002,12 +2994,8 @@ def test___eq___require_partition_filter_mismatch(self): self.assertNotEqual(time_partitioning, other) def test___eq___hit(self): - time_partitioning = self._make_one( - field="foo", expiration_ms=100000 - ) - other = self._make_one( - field="foo", expiration_ms=100000 - ) + time_partitioning = self._make_one(field="foo", expiration_ms=100000) + other = self._make_one(field="foo", expiration_ms=100000) self.assertEqual(time_partitioning, other) def test___ne___wrong_type(self): @@ -3051,16 +3039,9 @@ def test___repr___explicit(self): from google.cloud.bigquery.table import TimePartitioningType time_partitioning = self._make_one( - type_=TimePartitioningType.DAY, - field="name", - expiration_ms=10000, - ) - expected = ( - "TimePartitioning(" - "expirationMs=10000," - "field=name," - "type=DAY)" + type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) + expected = "TimePartitioning(" "expirationMs=10000," "field=name," "type=DAY)" self.assertEqual(repr(time_partitioning), expected) def test_set_expiration_w_none(self):