From 2526dc7405df8be9211f3a4536b8e83e8337d9f5 Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Tue, 30 Jul 2019 17:43:56 +0300
Subject: [PATCH 1/9] Update dataset.py

added support to Dataset for project_ids with org prefix
---
 bigquery/google/cloud/bigquery/dataset.py | 34 +++++++++++++++++++----
 1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index 01260ccc6e68..5688487aee69 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -18,6 +18,7 @@
 
 import six
 import copy
+import re
 
 import google.cloud._helpers
 from google.cloud.bigquery import _helpers
@@ -26,6 +27,13 @@
 from google.cloud.bigquery.table import TableReference
 
 
+_W_PREFIX = re.compile(
+    r"""
+    (\S*)\:(?P<ref>\S*)""",
+    re.VERBOSE,
+)
+
+
 def _get_table_reference(self, table_id):
     """Constructs a TableReference.
 
@@ -270,7 +278,8 @@ def from_string(cls, dataset_id, default_project=None):
             dataset_id (str):
                 A dataset ID in standard SQL format. If ``default_project``
                 is not specified, this must included both the project ID and
-                the dataset ID, separated by ``.``.
+                the dataset ID, separated by ``.`` or, single prefix usage is
+                also permitted.
             default_project (str):
                 Optional. The project ID to use when ``dataset_id`` does not
                 include a project ID.
@@ -283,6 +292,9 @@ def from_string(cls, dataset_id, default_project=None):
             >>> DatasetReference.from_string('my-project-id.some_dataset')
             DatasetReference('my-project-id', 'some_dataset')
 
+            >>> DatasetReference.from_string('prefix:my-project-id.some_dataset')
+            DatasetReference('my-project-id', 'some_dataset')
+
         Raises:
             ValueError:
                 If ``dataset_id`` is not a fully-qualified dataset ID in
@@ -290,21 +302,28 @@ def from_string(cls, dataset_id, default_project=None):
         """
         output_dataset_id = dataset_id
         output_project_id = default_project
-        parts = dataset_id.split(".")
+        with_prefix = _W_PREFIX.match(dataset_id)
+        if with_prefix is None:
+            parts = dataset_id.split(".")
+        else:
+            parts = with_prefix.group("ref").split(".")
 
         if len(parts) == 1 and not default_project:
             raise ValueError(
                 "When default_project is not set, dataset_id must be a "
                 "fully-qualified dataset ID in standard SQL format. "
-                'e.g. "project.dataset_id", got {}'.format(dataset_id)
+                'e.g. "project.dataset_id" or, single prefix usage '
+                'is also permitted e.g. "prefix:project.dataset_id" '
+                "got {}".format(dataset_id)
             )
         elif len(parts) == 2:
             output_project_id, output_dataset_id = parts
         elif len(parts) > 2:
             raise ValueError(
                 "Too many parts in dataset_id. Expected a fully-qualified "
-                "dataset ID in standard SQL format. e.g. "
-                '"project.dataset_id", got {}'.format(dataset_id)
+                'dataset ID in standard SQL format. e.g. "project.dataset_id" '
+                "or, single prefix usage is also permitted e.g. "
+                '"prefix:project.dataset_id" got {}'.format(dataset_id)
             )
 
         return cls(output_project_id, output_dataset_id)
@@ -555,7 +574,7 @@ def from_string(cls, full_dataset_id):
             full_dataset_id (str):
                 A fully-qualified dataset ID in standard SQL format. Must
                 included both the project ID and the dataset ID, separated by
-                ``.``.
+                ``.`` or, single prefix usage is also permitted.
 
         Returns:
             Dataset: Dataset parsed from ``full_dataset_id``.
@@ -564,6 +583,9 @@ def from_string(cls, full_dataset_id):
             >>> Dataset.from_string('my-project-id.some_dataset')
             Dataset(DatasetReference('my-project-id', 'some_dataset'))
 
+            >>> DatasetReference.from_string('prefix:my-project-id.some_dataset')
+            DatasetReference('my-project-id', 'some_dataset')
+
         Raises:
             ValueError:
                 If ``full_dataset_id`` is not a fully-qualified dataset ID in

From 533f959b869a820dfc280702d341af781525d3e8 Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Tue, 30 Jul 2019 17:44:21 +0300
Subject: [PATCH 2/9] Update test_dataset.py

updated tests to check dataset chgs
---
 bigquery/tests/unit/test_dataset.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py
index b8805a9c7ce3..e3615cef10fd 100644
--- a/bigquery/tests/unit/test_dataset.py
+++ b/bigquery/tests/unit/test_dataset.py
@@ -186,11 +186,22 @@ def test_from_string(self):
         self.assertEqual(got.project, "string-project")
         self.assertEqual(got.dataset_id, "string_dataset")
 
+    def test_from_string_w_prefix(self):
+        cls = self._get_target_class()
+        got = cls.from_string("prefix:string-project.string_dataset")
+        self.assertEqual(got.project, "string-project")
+        self.assertEqual(got.dataset_id, "string_dataset")
+
     def test_from_string_legacy_string(self):
         cls = self._get_target_class()
         with self.assertRaises(ValueError):
             cls.from_string("string-project:string_dataset")
 
+    def test_from_string_w_several_prefixes(self):
+        cls = self._get_target_class()
+        with self.assertRaises(ValueError):
+            cls.from_string("google.com:project:dataset_id")
+
     def test_from_string_not_fully_qualified(self):
         cls = self._get_target_class()
         with self.assertRaises(ValueError):

From 009ef57f93d27ef5c647fa4d198cf75195b30e97 Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Wed, 31 Jul 2019 12:54:51 +0300
Subject: [PATCH 3/9] minor chgs

---
 bigquery/google/cloud/bigquery/dataset.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index 5688487aee69..13c9509ee496 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -29,8 +29,9 @@
 
 _W_PREFIX = re.compile(
     r"""
-    (\S*)\:(?P<ref>\S*)""",
-    re.VERBOSE,
+    (\S*)\:(?P<ref>\S*)
+""",
+    # re.VERBOSE,
 )
 
 
@@ -277,7 +278,7 @@ def from_string(cls, dataset_id, default_project=None):
         Args:
             dataset_id (str):
                 A dataset ID in standard SQL format. If ``default_project``
-                is not specified, this must included both the project ID and
+                is not specified, this must include both the project ID and
                 the dataset ID, separated by ``.`` or, single prefix usage is
                 also permitted.
             default_project (str):
@@ -311,9 +312,9 @@ def from_string(cls, dataset_id, default_project=None):
         if len(parts) == 1 and not default_project:
             raise ValueError(
                 "When default_project is not set, dataset_id must be a "
-                "fully-qualified dataset ID in standard SQL format. "
-                'e.g. "project.dataset_id" or, single prefix usage '
-                'is also permitted e.g. "prefix:project.dataset_id" '
+                "fully-qualified dataset ID in standard SQL format, "
+                'e.g., "project.dataset_id" or, single prefix usage '
+                'is also permitted, e.g., "prefix:project.dataset_id" '
                 "got {}".format(dataset_id)
             )
         elif len(parts) == 2:
@@ -321,8 +322,8 @@ def from_string(cls, dataset_id, default_project=None):
         elif len(parts) > 2:
             raise ValueError(
                 "Too many parts in dataset_id. Expected a fully-qualified "
-                'dataset ID in standard SQL format. e.g. "project.dataset_id" '
-                "or, single prefix usage is also permitted e.g. "
+                'dataset ID in standard SQL format, e.g., "project.dataset_id" '
+                "or, single prefix usage is also permitted, e.g., "
                 '"prefix:project.dataset_id" got {}'.format(dataset_id)
             )
 
@@ -573,7 +574,7 @@ def from_string(cls, full_dataset_id):
         Args:
             full_dataset_id (str):
                 A fully-qualified dataset ID in standard SQL format. Must
-                included both the project ID and the dataset ID, separated by
+                include both the project ID and the dataset ID, separated by
                 ``.`` or, single prefix usage is also permitted.
 
         Returns:

From 452adf858538253981c724fa38dea953a7b4c628 Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Wed, 31 Jul 2019 12:55:20 +0300
Subject: [PATCH 4/9] *

---
 bigquery/google/cloud/bigquery/dataset.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index 13c9509ee496..c20f0ba2d2df 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -31,7 +31,6 @@
     r"""
     (\S*)\:(?P<ref>\S*)
 """,
-    # re.VERBOSE,
 )
 
 

From 3e042732f6e85d785010fd94a22f39e8077e1ebd Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Thu, 1 Aug 2019 17:59:49 +0300
Subject: [PATCH 5/9] fixed tests issue

---
 bigquery/google/cloud/bigquery/dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index c20f0ba2d2df..8ccbd48dfdf7 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -31,6 +31,7 @@
     r"""
     (\S*)\:(?P<ref>\S*)
 """,
+    re.VERBOSE,
 )
 
 

From 7bc877f327a670ee697656dfeb08ed78aba933eb Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Mon, 5 Aug 2019 10:47:10 +0300
Subject: [PATCH 6/9] minor corrections

Applying requested chgs.
// Removed description for 'single prefix'.
---
 bigquery/google/cloud/bigquery/dataset.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index 8ccbd48dfdf7..c270e16d5830 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -279,8 +279,7 @@ def from_string(cls, dataset_id, default_project=None):
             dataset_id (str):
                 A dataset ID in standard SQL format. If ``default_project``
                 is not specified, this must include both the project ID and
-                the dataset ID, separated by ``.`` or, single prefix usage is
-                also permitted.
+                the dataset ID, separated by ``.``.
             default_project (str):
                 Optional. The project ID to use when ``dataset_id`` does not
                 include a project ID.
@@ -293,9 +292,6 @@ def from_string(cls, dataset_id, default_project=None):
             >>> DatasetReference.from_string('my-project-id.some_dataset')
             DatasetReference('my-project-id', 'some_dataset')
 
-            >>> DatasetReference.from_string('prefix:my-project-id.some_dataset')
-            DatasetReference('my-project-id', 'some_dataset')
-
         Raises:
             ValueError:
                 If ``dataset_id`` is not a fully-qualified dataset ID in
@@ -313,18 +309,15 @@ def from_string(cls, dataset_id, default_project=None):
             raise ValueError(
                 "When default_project is not set, dataset_id must be a "
                 "fully-qualified dataset ID in standard SQL format, "
-                'e.g., "project.dataset_id" or, single prefix usage '
-                'is also permitted, e.g., "prefix:project.dataset_id" '
-                "got {}".format(dataset_id)
+                'e.g., "project.dataset_id" got {}'.format(dataset_id)
             )
         elif len(parts) == 2:
             output_project_id, output_dataset_id = parts
         elif len(parts) > 2:
             raise ValueError(
                 "Too many parts in dataset_id. Expected a fully-qualified "
-                'dataset ID in standard SQL format, e.g., "project.dataset_id" '
-                "or, single prefix usage is also permitted, e.g., "
-                '"prefix:project.dataset_id" got {}'.format(dataset_id)
+                "dataset ID in standard SQL format. e.g. "
+                '"project.dataset_id", got {}'.format(dataset_id)
             )
 
         return cls(output_project_id, output_dataset_id)
@@ -575,7 +568,7 @@ def from_string(cls, full_dataset_id):
             full_dataset_id (str):
                 A fully-qualified dataset ID in standard SQL format. Must
                 include both the project ID and the dataset ID, separated by
-                ``.`` or, single prefix usage is also permitted.
+                ``.``.
 
         Returns:
             Dataset: Dataset parsed from ``full_dataset_id``.
@@ -584,9 +577,6 @@ def from_string(cls, full_dataset_id):
             >>> Dataset.from_string('my-project-id.some_dataset')
             Dataset(DatasetReference('my-project-id', 'some_dataset'))
 
-            >>> DatasetReference.from_string('prefix:my-project-id.some_dataset')
-            DatasetReference('my-project-id', 'some_dataset')
-
         Raises:
             ValueError:
                 If ``full_dataset_id`` is not a fully-qualified dataset ID in

From 4c13b065d0cf1a901714d1da33f8ea84fe4b44cb Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Thu, 8 Aug 2019 14:54:20 +0300
Subject: [PATCH 7/9] major corrections

Complete template change.
---
 bigquery/google/cloud/bigquery/dataset.py | 10 ++++++----
 bigquery/tests/unit/test_dataset.py       |  6 +++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index c270e16d5830..835d91b652e1 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -27,9 +27,9 @@
 from google.cloud.bigquery.table import TableReference
 
 
-_W_PREFIX = re.compile(
+_PROJECT_PREFIX_PATTERN = re.compile(
     r"""
-    (\S*)\:(?P<ref>\S*)
+    (?P<prefix>\S+\:\S+)\.+(?P<remaining>\S*)
 """,
     re.VERBOSE,
 )
@@ -299,11 +299,13 @@ def from_string(cls, dataset_id, default_project=None):
         """
         output_dataset_id = dataset_id
         output_project_id = default_project
-        with_prefix = _W_PREFIX.match(dataset_id)
+        with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
         if with_prefix is None:
             parts = dataset_id.split(".")
         else:
-            parts = with_prefix.group("ref").split(".")
+            prefix = with_prefix.group("prefix")
+            remaining = with_prefix.group("remaining")
+            parts = [prefix, remaining]
 
         if len(parts) == 1 and not default_project:
             raise ValueError(
diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py
index e3615cef10fd..cd9eae8973da 100644
--- a/bigquery/tests/unit/test_dataset.py
+++ b/bigquery/tests/unit/test_dataset.py
@@ -189,7 +189,7 @@ def test_from_string(self):
     def test_from_string_w_prefix(self):
         cls = self._get_target_class()
         got = cls.from_string("prefix:string-project.string_dataset")
-        self.assertEqual(got.project, "string-project")
+        self.assertEqual(got.project, "prefix:string-project")
         self.assertEqual(got.dataset_id, "string_dataset")
 
     def test_from_string_legacy_string(self):
@@ -197,10 +197,10 @@ def test_from_string_legacy_string(self):
         with self.assertRaises(ValueError):
             cls.from_string("string-project:string_dataset")
 
-    def test_from_string_w_several_prefixes(self):
+    def test_from_string_w_incorrect_prefix(self):
         cls = self._get_target_class()
         with self.assertRaises(ValueError):
-            cls.from_string("google.com:project:dataset_id")
+            cls.from_string("google.com.string-project.dataset_id")
 
     def test_from_string_not_fully_qualified(self):
         cls = self._get_target_class()

From 07dde13b03bfce27527a33e473ad73e173a525f5 Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Mon, 12 Aug 2019 10:52:13 +0300
Subject: [PATCH 8/9] pattern update

minor corrections
---
 bigquery/google/cloud/bigquery/dataset.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index 835d91b652e1..2cb4b752457b 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -27,12 +27,7 @@
 from google.cloud.bigquery.table import TableReference
 
 
-_PROJECT_PREFIX_PATTERN = re.compile(
-    r"""
-    (?P<prefix>\S+\:\S+)\.+(?P<remaining>\S*)
-""",
-    re.VERBOSE,
-)
+_PROJECT_PREFIX_PATTERN = re.compile(r"(?P<project_id>\S+\:\S+)\.+(?P<dataset_id>\S+)$")
 
 
 def _get_table_reference(self, table_id):
@@ -303,9 +298,9 @@ def from_string(cls, dataset_id, default_project=None):
         if with_prefix is None:
             parts = dataset_id.split(".")
         else:
-            prefix = with_prefix.group("prefix")
-            remaining = with_prefix.group("remaining")
-            parts = [prefix, remaining]
+            project_id = with_prefix.group("project_id")
+            dataset_id = with_prefix.group("dataset_id")
+            parts = [project_id, dataset_id]
 
         if len(parts) == 1 and not default_project:
             raise ValueError(

From e75e0fcdca659d35d02f8ce3f43710bef57d3451 Mon Sep 17 00:00:00 2001
From: Leonid Emar-Kar <l.emarkar@outlook.com>
Date: Thu, 15 Aug 2019 11:11:11 +0300
Subject: [PATCH 9/9] update pattern and tests

pattern rewrote with the '[^.]' and .VERBOSE (due to blacken session)
added test to check extra parts within the string with the prefix
reconf prefix in an existed test
---
 bigquery/google/cloud/bigquery/dataset.py | 7 ++++++-
 bigquery/tests/unit/test_dataset.py       | 9 +++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py
index 2cb4b752457b..494c219d4f67 100644
--- a/bigquery/google/cloud/bigquery/dataset.py
+++ b/bigquery/google/cloud/bigquery/dataset.py
@@ -27,7 +27,12 @@
 from google.cloud.bigquery.table import TableReference
 
 
-_PROJECT_PREFIX_PATTERN = re.compile(r"(?P<project_id>\S+\:\S+)\.+(?P<dataset_id>\S+)$")
+_PROJECT_PREFIX_PATTERN = re.compile(
+    r"""
+    (?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
+""",
+    re.VERBOSE,
+)
 
 
 def _get_table_reference(self, table_id):
diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py
index cd9eae8973da..26b1729a240c 100644
--- a/bigquery/tests/unit/test_dataset.py
+++ b/bigquery/tests/unit/test_dataset.py
@@ -188,8 +188,8 @@ def test_from_string(self):
 
     def test_from_string_w_prefix(self):
         cls = self._get_target_class()
-        got = cls.from_string("prefix:string-project.string_dataset")
-        self.assertEqual(got.project, "prefix:string-project")
+        got = cls.from_string("google.com:string-project.string_dataset")
+        self.assertEqual(got.project, "google.com:string-project")
         self.assertEqual(got.dataset_id, "string_dataset")
 
     def test_from_string_legacy_string(self):
@@ -202,6 +202,11 @@ def test_from_string_w_incorrect_prefix(self):
         with self.assertRaises(ValueError):
             cls.from_string("google.com.string-project.dataset_id")
 
+    def test_from_string_w_prefix_and_too_many_parts(self):
+        cls = self._get_target_class()
+        with self.assertRaises(ValueError):
+            cls.from_string("google.com:string-project.dataset_id.table_id")
+
     def test_from_string_not_fully_qualified(self):
         cls = self._get_target_class()
         with self.assertRaises(ValueError):