From 2526dc7405df8be9211f3a4536b8e83e8337d9f5 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Tue, 30 Jul 2019 17:43:56 +0300 Subject: [PATCH 1/9] Update dataset.py added support to Dataset for project_ids with org prefix --- bigquery/google/cloud/bigquery/dataset.py | 34 +++++++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 01260ccc6e68..5688487aee69 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -18,6 +18,7 @@ import six import copy +import re import google.cloud._helpers from google.cloud.bigquery import _helpers @@ -26,6 +27,13 @@ from google.cloud.bigquery.table import TableReference +_W_PREFIX = re.compile( + r""" + (\S*)\:(?P\S*)""", + re.VERBOSE, +) + + def _get_table_reference(self, table_id): """Constructs a TableReference. @@ -270,7 +278,8 @@ def from_string(cls, dataset_id, default_project=None): dataset_id (str): A dataset ID in standard SQL format. If ``default_project`` is not specified, this must included both the project ID and - the dataset ID, separated by ``.``. + the dataset ID, separated by ``.`` or, single prefix usage is + also permitted. default_project (str): Optional. The project ID to use when ``dataset_id`` does not include a project ID. @@ -283,6 +292,9 @@ def from_string(cls, dataset_id, default_project=None): >>> DatasetReference.from_string('my-project-id.some_dataset') DatasetReference('my-project-id', 'some_dataset') + >>> DatasetReference.from_string('prefix:my-project-id.some_dataset') + DatasetReference('my-project-id', 'some_dataset') + Raises: ValueError: If ``dataset_id`` is not a fully-qualified dataset ID in @@ -290,21 +302,28 @@ def from_string(cls, dataset_id, default_project=None): """ output_dataset_id = dataset_id output_project_id = default_project - parts = dataset_id.split(".") + with_prefix = _W_PREFIX.match(dataset_id) + if with_prefix is None: + parts = dataset_id.split(".") + else: + parts = with_prefix.group("ref").split(".") if len(parts) == 1 and not default_project: raise ValueError( "When default_project is not set, dataset_id must be a " "fully-qualified dataset ID in standard SQL format. " - 'e.g. "project.dataset_id", got {}'.format(dataset_id) + 'e.g. "project.dataset_id" or, single prefix usage ' + 'is also permitted e.g. "prefix:project.dataset_id" ' + "got {}".format(dataset_id) ) elif len(parts) == 2: output_project_id, output_dataset_id = parts elif len(parts) > 2: raise ValueError( "Too many parts in dataset_id. Expected a fully-qualified " - "dataset ID in standard SQL format. e.g. " - '"project.dataset_id", got {}'.format(dataset_id) + 'dataset ID in standard SQL format. e.g. "project.dataset_id" ' + "or, single prefix usage is also permitted e.g. " + '"prefix:project.dataset_id" got {}'.format(dataset_id) ) return cls(output_project_id, output_dataset_id) @@ -555,7 +574,7 @@ def from_string(cls, full_dataset_id): full_dataset_id (str): A fully-qualified dataset ID in standard SQL format. Must included both the project ID and the dataset ID, separated by - ``.``. + ``.`` or, single prefix usage is also permitted. Returns: Dataset: Dataset parsed from ``full_dataset_id``. @@ -564,6 +583,9 @@ def from_string(cls, full_dataset_id): >>> Dataset.from_string('my-project-id.some_dataset') Dataset(DatasetReference('my-project-id', 'some_dataset')) + >>> DatasetReference.from_string('prefix:my-project-id.some_dataset') + DatasetReference('my-project-id', 'some_dataset') + Raises: ValueError: If ``full_dataset_id`` is not a fully-qualified dataset ID in From 533f959b869a820dfc280702d341af781525d3e8 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Tue, 30 Jul 2019 17:44:21 +0300 Subject: [PATCH 2/9] Update test_dataset.py updated tests to check dataset chgs --- bigquery/tests/unit/test_dataset.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index b8805a9c7ce3..e3615cef10fd 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -186,11 +186,22 @@ def test_from_string(self): self.assertEqual(got.project, "string-project") self.assertEqual(got.dataset_id, "string_dataset") + def test_from_string_w_prefix(self): + cls = self._get_target_class() + got = cls.from_string("prefix:string-project.string_dataset") + self.assertEqual(got.project, "string-project") + self.assertEqual(got.dataset_id, "string_dataset") + def test_from_string_legacy_string(self): cls = self._get_target_class() with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset") + def test_from_string_w_several_prefixes(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string("google.com:project:dataset_id") + def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError): From 009ef57f93d27ef5c647fa4d198cf75195b30e97 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Wed, 31 Jul 2019 12:54:51 +0300 Subject: [PATCH 3/9] minor chgs --- bigquery/google/cloud/bigquery/dataset.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 5688487aee69..13c9509ee496 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -29,8 +29,9 @@ _W_PREFIX = re.compile( r""" - (\S*)\:(?P\S*)""", - re.VERBOSE, + (\S*)\:(?P\S*) +""", + # re.VERBOSE, ) @@ -277,7 +278,7 @@ def from_string(cls, dataset_id, default_project=None): Args: dataset_id (str): A dataset ID in standard SQL format. If ``default_project`` - is not specified, this must included both the project ID and + is not specified, this must include both the project ID and the dataset ID, separated by ``.`` or, single prefix usage is also permitted. default_project (str): @@ -311,9 +312,9 @@ def from_string(cls, dataset_id, default_project=None): if len(parts) == 1 and not default_project: raise ValueError( "When default_project is not set, dataset_id must be a " - "fully-qualified dataset ID in standard SQL format. " - 'e.g. "project.dataset_id" or, single prefix usage ' - 'is also permitted e.g. "prefix:project.dataset_id" ' + "fully-qualified dataset ID in standard SQL format, " + 'e.g., "project.dataset_id" or, single prefix usage ' + 'is also permitted, e.g., "prefix:project.dataset_id" ' "got {}".format(dataset_id) ) elif len(parts) == 2: @@ -321,8 +322,8 @@ def from_string(cls, dataset_id, default_project=None): elif len(parts) > 2: raise ValueError( "Too many parts in dataset_id. Expected a fully-qualified " - 'dataset ID in standard SQL format. e.g. "project.dataset_id" ' - "or, single prefix usage is also permitted e.g. " + 'dataset ID in standard SQL format, e.g., "project.dataset_id" ' + "or, single prefix usage is also permitted, e.g., " '"prefix:project.dataset_id" got {}'.format(dataset_id) ) @@ -573,7 +574,7 @@ def from_string(cls, full_dataset_id): Args: full_dataset_id (str): A fully-qualified dataset ID in standard SQL format. Must - included both the project ID and the dataset ID, separated by + include both the project ID and the dataset ID, separated by ``.`` or, single prefix usage is also permitted. Returns: From 452adf858538253981c724fa38dea953a7b4c628 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Wed, 31 Jul 2019 12:55:20 +0300 Subject: [PATCH 4/9] * --- bigquery/google/cloud/bigquery/dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 13c9509ee496..c20f0ba2d2df 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -31,7 +31,6 @@ r""" (\S*)\:(?P\S*) """, - # re.VERBOSE, ) From 3e042732f6e85d785010fd94a22f39e8077e1ebd Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Thu, 1 Aug 2019 17:59:49 +0300 Subject: [PATCH 5/9] fixed tests issue --- bigquery/google/cloud/bigquery/dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index c20f0ba2d2df..8ccbd48dfdf7 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -31,6 +31,7 @@ r""" (\S*)\:(?P\S*) """, + re.VERBOSE, ) From 7bc877f327a670ee697656dfeb08ed78aba933eb Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Mon, 5 Aug 2019 10:47:10 +0300 Subject: [PATCH 6/9] minor corrections Applying requested chgs. // Removed description for 'single prefix'. --- bigquery/google/cloud/bigquery/dataset.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 8ccbd48dfdf7..c270e16d5830 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -279,8 +279,7 @@ def from_string(cls, dataset_id, default_project=None): dataset_id (str): A dataset ID in standard SQL format. If ``default_project`` is not specified, this must include both the project ID and - the dataset ID, separated by ``.`` or, single prefix usage is - also permitted. + the dataset ID, separated by ``.``. default_project (str): Optional. The project ID to use when ``dataset_id`` does not include a project ID. @@ -293,9 +292,6 @@ def from_string(cls, dataset_id, default_project=None): >>> DatasetReference.from_string('my-project-id.some_dataset') DatasetReference('my-project-id', 'some_dataset') - >>> DatasetReference.from_string('prefix:my-project-id.some_dataset') - DatasetReference('my-project-id', 'some_dataset') - Raises: ValueError: If ``dataset_id`` is not a fully-qualified dataset ID in @@ -313,18 +309,15 @@ def from_string(cls, dataset_id, default_project=None): raise ValueError( "When default_project is not set, dataset_id must be a " "fully-qualified dataset ID in standard SQL format, " - 'e.g., "project.dataset_id" or, single prefix usage ' - 'is also permitted, e.g., "prefix:project.dataset_id" ' - "got {}".format(dataset_id) + 'e.g., "project.dataset_id" got {}'.format(dataset_id) ) elif len(parts) == 2: output_project_id, output_dataset_id = parts elif len(parts) > 2: raise ValueError( "Too many parts in dataset_id. Expected a fully-qualified " - 'dataset ID in standard SQL format, e.g., "project.dataset_id" ' - "or, single prefix usage is also permitted, e.g., " - '"prefix:project.dataset_id" got {}'.format(dataset_id) + "dataset ID in standard SQL format. e.g. " + '"project.dataset_id", got {}'.format(dataset_id) ) return cls(output_project_id, output_dataset_id) @@ -575,7 +568,7 @@ def from_string(cls, full_dataset_id): full_dataset_id (str): A fully-qualified dataset ID in standard SQL format. Must include both the project ID and the dataset ID, separated by - ``.`` or, single prefix usage is also permitted. + ``.``. Returns: Dataset: Dataset parsed from ``full_dataset_id``. @@ -584,9 +577,6 @@ def from_string(cls, full_dataset_id): >>> Dataset.from_string('my-project-id.some_dataset') Dataset(DatasetReference('my-project-id', 'some_dataset')) - >>> DatasetReference.from_string('prefix:my-project-id.some_dataset') - DatasetReference('my-project-id', 'some_dataset') - Raises: ValueError: If ``full_dataset_id`` is not a fully-qualified dataset ID in From 4c13b065d0cf1a901714d1da33f8ea84fe4b44cb Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Thu, 8 Aug 2019 14:54:20 +0300 Subject: [PATCH 7/9] major corrections Complete template change. --- bigquery/google/cloud/bigquery/dataset.py | 10 ++++++---- bigquery/tests/unit/test_dataset.py | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index c270e16d5830..835d91b652e1 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -27,9 +27,9 @@ from google.cloud.bigquery.table import TableReference -_W_PREFIX = re.compile( +_PROJECT_PREFIX_PATTERN = re.compile( r""" - (\S*)\:(?P\S*) + (?P\S+\:\S+)\.+(?P\S*) """, re.VERBOSE, ) @@ -299,11 +299,13 @@ def from_string(cls, dataset_id, default_project=None): """ output_dataset_id = dataset_id output_project_id = default_project - with_prefix = _W_PREFIX.match(dataset_id) + with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id) if with_prefix is None: parts = dataset_id.split(".") else: - parts = with_prefix.group("ref").split(".") + prefix = with_prefix.group("prefix") + remaining = with_prefix.group("remaining") + parts = [prefix, remaining] if len(parts) == 1 and not default_project: raise ValueError( diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index e3615cef10fd..cd9eae8973da 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -189,7 +189,7 @@ def test_from_string(self): def test_from_string_w_prefix(self): cls = self._get_target_class() got = cls.from_string("prefix:string-project.string_dataset") - self.assertEqual(got.project, "string-project") + self.assertEqual(got.project, "prefix:string-project") self.assertEqual(got.dataset_id, "string_dataset") def test_from_string_legacy_string(self): @@ -197,10 +197,10 @@ def test_from_string_legacy_string(self): with self.assertRaises(ValueError): cls.from_string("string-project:string_dataset") - def test_from_string_w_several_prefixes(self): + def test_from_string_w_incorrect_prefix(self): cls = self._get_target_class() with self.assertRaises(ValueError): - cls.from_string("google.com:project:dataset_id") + cls.from_string("google.com.string-project.dataset_id") def test_from_string_not_fully_qualified(self): cls = self._get_target_class() From 07dde13b03bfce27527a33e473ad73e173a525f5 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Mon, 12 Aug 2019 10:52:13 +0300 Subject: [PATCH 8/9] pattern update minor corrections --- bigquery/google/cloud/bigquery/dataset.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 835d91b652e1..2cb4b752457b 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -27,12 +27,7 @@ from google.cloud.bigquery.table import TableReference -_PROJECT_PREFIX_PATTERN = re.compile( - r""" - (?P\S+\:\S+)\.+(?P\S*) -""", - re.VERBOSE, -) +_PROJECT_PREFIX_PATTERN = re.compile(r"(?P\S+\:\S+)\.+(?P\S+)$") def _get_table_reference(self, table_id): @@ -303,9 +298,9 @@ def from_string(cls, dataset_id, default_project=None): if with_prefix is None: parts = dataset_id.split(".") else: - prefix = with_prefix.group("prefix") - remaining = with_prefix.group("remaining") - parts = [prefix, remaining] + project_id = with_prefix.group("project_id") + dataset_id = with_prefix.group("dataset_id") + parts = [project_id, dataset_id] if len(parts) == 1 and not default_project: raise ValueError( From e75e0fcdca659d35d02f8ce3f43710bef57d3451 Mon Sep 17 00:00:00 2001 From: Leonid Emar-Kar Date: Thu, 15 Aug 2019 11:11:11 +0300 Subject: [PATCH 9/9] update pattern and tests pattern rewrote with the '[^.]' and .VERBOSE (due to blacken session) added test to check extra parts within the string with the prefix reconf prefix in an existed test --- bigquery/google/cloud/bigquery/dataset.py | 7 ++++++- bigquery/tests/unit/test_dataset.py | 9 +++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 2cb4b752457b..494c219d4f67 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -27,7 +27,12 @@ from google.cloud.bigquery.table import TableReference -_PROJECT_PREFIX_PATTERN = re.compile(r"(?P\S+\:\S+)\.+(?P\S+)$") +_PROJECT_PREFIX_PATTERN = re.compile( + r""" + (?P\S+\:[^.]+)\.(?P[^.]+)$ +""", + re.VERBOSE, +) def _get_table_reference(self, table_id): diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index cd9eae8973da..26b1729a240c 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -188,8 +188,8 @@ def test_from_string(self): def test_from_string_w_prefix(self): cls = self._get_target_class() - got = cls.from_string("prefix:string-project.string_dataset") - self.assertEqual(got.project, "prefix:string-project") + got = cls.from_string("google.com:string-project.string_dataset") + self.assertEqual(got.project, "google.com:string-project") self.assertEqual(got.dataset_id, "string_dataset") def test_from_string_legacy_string(self): @@ -202,6 +202,11 @@ def test_from_string_w_incorrect_prefix(self): with self.assertRaises(ValueError): cls.from_string("google.com.string-project.dataset_id") + def test_from_string_w_prefix_and_too_many_parts(self): + cls = self._get_target_class() + with self.assertRaises(ValueError): + cls.from_string("google.com:string-project.dataset_id.table_id") + def test_from_string_not_fully_qualified(self): cls = self._get_target_class() with self.assertRaises(ValueError):