From 227d76a86e7fb887563b83ab0d21403e68dd94c3 Mon Sep 17 00:00:00 2001 From: Jonathan Amsterdam Date: Thu, 21 Sep 2017 16:26:23 -0400 Subject: [PATCH] bigquery: remove client from Dataset (#4018) --- bigquery/google/cloud/bigquery/client.py | 8 +-- bigquery/google/cloud/bigquery/dataset.py | 27 +++------ bigquery/google/cloud/bigquery/job.py | 19 ++++--- bigquery/tests/system.py | 5 +- bigquery/tests/unit/test_dataset.py | 69 +++++++---------------- bigquery/tests/unit/test_job.py | 4 +- bigquery/tests/unit/test_query.py | 4 +- 7 files changed, 49 insertions(+), 87 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 3c02a8e10df2..f40904c2057e 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -190,7 +190,7 @@ def create_dataset(self, dataset): path = '/projects/%s/datasets' % (dataset.project,) api_response = self._connection.api_request( method='POST', path=path, data=dataset._build_resource()) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def get_dataset(self, dataset_ref): """Fetch the dataset referenced by ``dataset_ref`` @@ -204,7 +204,7 @@ def get_dataset(self, dataset_ref): """ api_response = self._connection.api_request( method='GET', path=dataset_ref.path) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def get_table(self, table_ref): """Fetch the table referenced by ``table_ref`` @@ -262,7 +262,7 @@ def update_dataset(self, dataset, fields): headers = None api_response = self._connection.api_request( method='PATCH', path=path, data=partial, headers=headers) - return Dataset.from_api_repr(api_response, self) + return Dataset.from_api_repr(api_response) def list_dataset_tables(self, dataset, max_results=None, page_token=None): """List tables in the dataset. @@ -622,7 +622,7 @@ def _item_to_dataset(iterator, resource): :rtype: :class:`.Dataset` :returns: The next dataset in the page. """ - return Dataset.from_api_repr(resource, iterator.client) + return Dataset.from_api_repr(resource) def _item_to_job(iterator, resource): diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index fb41ee2e8a95..c5bed721bab4 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -157,38 +157,31 @@ class Dataset(object): :type dataset_id: str :param dataset_id: the ID of the dataset - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: (Optional) A client which holds credentials and project - configuration for the dataset (which requires a project). - :type access_entries: list of :class:`AccessEntry` :param access_entries: roles granted to entities for this dataset :type project: str - :param project: (Optional) project ID for the dataset (defaults to - the project of the client). + :param project: (Optional) project ID for the dataset. """ _access_entries = None def __init__(self, dataset_id, - client=None, access_entries=(), project=None): self._dataset_id = dataset_id - self._client = client self._properties = {} # Let the @property do validation. self.access_entries = access_entries - self._project = project or (client and client.project) + self._project = project @property def project(self): """Project bound to the dataset. :rtype: str - :returns: the project (derived from the client). + :returns: the project. """ return self._project @@ -373,25 +366,21 @@ def location(self, value): self._properties['location'] = value @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource): """Factory: construct a dataset given its API representation :type resource: dict :param resource: dataset resource representation returned from the API - :type client: :class:`google.cloud.bigquery.client.Client` - :param client: Client which holds credentials and project - configuration for the dataset. - :rtype: :class:`google.cloud.bigquery.dataset.Dataset` :returns: Dataset parsed from ``resource``. """ - if ('datasetReference' not in resource or - 'datasetId' not in resource['datasetReference']): + dsr = resource.get('datasetReference') + if dsr is None or 'datasetId' not in dsr: raise KeyError('Resource lacks required identity information:' '["datasetReference"]["datasetId"]') - dataset_id = resource['datasetReference']['datasetId'] - dataset = cls(dataset_id, client=client) + dataset_id = dsr['datasetId'] + dataset = cls(dataset_id, project=dsr['projectId']) dataset._set_properties(resource) return dataset diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 1e0959565074..42f12ac39838 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -842,7 +842,8 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) + dataset = Dataset(dest_config['datasetId'], + project=dest_config['projectId']) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) source_urls = config.get('sourceUris', ()) @@ -958,7 +959,8 @@ def from_api_repr(cls, resource, client): """ job_id, config = cls._get_resource_config(resource) dest_config = config['destinationTable'] - dataset = Dataset(dest_config['datasetId'], client) + dataset = Dataset(dest_config['datasetId'], + project=dest_config['projectId']) table_ref = TableReference(dataset, dest_config['tableId']) destination = Table(table_ref, client=client) sources = [] @@ -970,7 +972,8 @@ def from_api_repr(cls, resource, client): "Resource missing 'sourceTables' / 'sourceTable'") source_configs = [single] for source_config in source_configs: - dataset = Dataset(source_config['datasetId'], client) + dataset = Dataset(source_config['datasetId'], + project=source_config['projectId']) table_ref = TableReference(dataset, source_config['tableId']) sources.append(Table(table_ref, client=client)) job = cls(job_id, destination, sources, client=client) @@ -1423,8 +1426,7 @@ def _copy_configuration_properties(self, configuration): dest_local = self._destination_table_resource() if dest_remote != dest_local: project = dest_remote['projectId'] - dataset = Dataset( - dest_remote['datasetId'], self._client, project=project) + dataset = Dataset(dest_remote['datasetId'], project=project) self.destination = dataset.table(dest_remote['tableId']) def_ds = configuration.get('defaultDataset') @@ -1432,8 +1434,8 @@ def _copy_configuration_properties(self, configuration): if self.default_dataset is not None: del self.default_dataset else: - project = def_ds['projectId'] - self.default_dataset = Dataset(def_ds['datasetId'], self._client) + self.default_dataset = Dataset(def_ds['datasetId'], + project=def_ds['projectId']) udf_resources = [] for udf_mapping in configuration.get(self._UDF_KEY, ()): @@ -1579,7 +1581,6 @@ def referenced_tables(self): if the query has not yet completed. """ tables = [] - client = self._require_client(None) datasets_by_project_name = {} for table in self._job_statistics().get('referencedTables', ()): @@ -1589,7 +1590,7 @@ def referenced_tables(self): ds_name = table['datasetId'] t_dataset = datasets_by_project_name.get((t_project, ds_name)) if t_dataset is None: - t_dataset = Dataset(ds_name, client, project=t_project) + t_dataset = Dataset(ds_name, project=t_project) datasets_by_project_name[(t_project, ds_name)] = t_dataset t_name = table['tableId'] diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index 82763d89724b..d621a661f06d 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -117,11 +117,12 @@ def test_create_dataset(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.dataset_id, DATASET_ID) + self.assertEqual(dataset.project, Config.CLIENT.project) def test_get_dataset(self): DATASET_ID = _make_dataset_id('get_dataset') client = Config.CLIENT - dataset_arg = Dataset(DATASET_ID) + dataset_arg = Dataset(DATASET_ID, project=client.project) dataset_arg.friendly_name = 'Friendly' dataset_arg.description = 'Description' dataset = retry_403(client.create_dataset)(dataset_arg) @@ -1195,7 +1196,7 @@ def test_dump_table_w_public_data(self): DATASET_ID = 'samples' TABLE_NAME = 'natality' - dataset = Dataset(DATASET_ID, Config.CLIENT, project=PUBLIC) + dataset = Dataset(DATASET_ID, project=PUBLIC) table_ref = dataset.table(TABLE_NAME) table = Config.CLIENT.get_table(table_ref) self._fetch_single_page(table) diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index c2fa2a024f17..89114196d828 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -210,11 +210,9 @@ def _verify_resource_properties(self, dataset, resource): self.assertEqual(dataset.access_entries, []) def test_ctor_defaults(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID, project=self.PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertIs(dataset._client, client) - self.assertEqual(dataset.project, client.project) + self.assertEqual(dataset.project, self.PROJECT) self.assertEqual( dataset.path, '/projects/%s/datasets/%s' % (self.PROJECT, self.DS_ID)) @@ -238,12 +236,10 @@ def test_ctor_explicit(self): bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') entries = [phred, bharney] OTHER_PROJECT = 'foo-bar-123' - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client, + dataset = self._make_one(self.DS_ID, access_entries=entries, project=OTHER_PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertIs(dataset._client, client) self.assertEqual(dataset.project, OTHER_PROJECT) self.assertEqual( dataset.path, @@ -262,16 +258,14 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) def test_access_entries_setter_non_list(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(TypeError): dataset.access_entries = object() def test_access_entries_setter_invalid_field(self): from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') with self.assertRaises(ValueError): dataset.access_entries = [phred, object()] @@ -279,72 +273,61 @@ def test_access_entries_setter_invalid_field(self): def test_access_entries_setter(self): from google.cloud.bigquery.dataset import AccessEntry - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) phred = AccessEntry('OWNER', 'userByEmail', 'phred@example.com') bharney = AccessEntry('OWNER', 'userByEmail', 'bharney@example.com') dataset.access_entries = [phred, bharney] self.assertEqual(dataset.access_entries, [phred, bharney]) def test_default_table_expiration_ms_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.default_table_expiration_ms = 'bogus' def test_default_table_expiration_ms_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.default_table_expiration_ms = 12345 self.assertEqual(dataset.default_table_expiration_ms, 12345) def test_description_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.description = 12345 def test_description_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.description = 'DESCRIPTION' self.assertEqual(dataset.description, 'DESCRIPTION') def test_friendly_name_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.friendly_name = 12345 def test_friendly_name_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.friendly_name = 'FRIENDLY' self.assertEqual(dataset.friendly_name, 'FRIENDLY') def test_location_setter_bad_value(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset.location = 12345 def test_location_setter(self): - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client) + dataset = self._make_one(self.DS_ID) dataset.location = 'LOCATION' self.assertEqual(dataset.location, 'LOCATION') def test_from_api_repr_missing_identity(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = {} klass = self._get_target_class() with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) + klass.from_api_repr(RESOURCE) def test_from_api_repr_bare(self): self._setUpConstants() - client = _Client(self.PROJECT) RESOURCE = { 'id': '%s:%s' % (self.PROJECT, self.DS_ID), 'datasetReference': { @@ -353,24 +336,20 @@ def test_from_api_repr_bare(self): } } klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) + dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) def test_from_api_repr_w_properties(self): - client = _Client(self.PROJECT) RESOURCE = self._makeResource() klass = self._get_target_class() - dataset = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(dataset._client, client) + dataset = klass.from_api_repr(RESOURCE) self._verify_resource_properties(dataset, RESOURCE) def test__parse_access_entries_w_unknown_entity_type(self): ACCESS = [ {'role': 'READER', 'unknown': 'UNKNOWN'}, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) @@ -383,24 +362,16 @@ def test__parse_access_entries_w_extra_keys(self): 'userByEmail': USER_EMAIL, }, ] - client = _Client(self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID) with self.assertRaises(ValueError): dataset._parse_access_entries(ACCESS) def test_table(self): from google.cloud.bigquery.table import TableReference - client = _Client(project=self.PROJECT) - dataset = self._make_one(self.DS_ID, client=client) + dataset = self._make_one(self.DS_ID, project=self.PROJECT) table = dataset.table('table_id') self.assertIsInstance(table, TableReference) self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) - - -class _Client(object): - - def __init__(self, project='project'): - self.project = project diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 7c662d01d8c6..470e802d1150 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -2139,7 +2139,7 @@ def test_begin_w_bound_client(self): client = _Client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_NAME, self.QUERY, client) - job.default_dataset = Dataset(DS_ID, client) + job.default_dataset = Dataset(DS_ID, project=self.PROJECT) job.begin() @@ -2204,7 +2204,7 @@ def test_begin_w_alternate_client(self): job = self._make_one(self.JOB_NAME, self.QUERY, client1) dataset_ref = DatasetReference(self.PROJECT, DS_ID) - dataset = Dataset(DS_ID, client1) + dataset = Dataset(DS_ID, project=self.PROJECT) table_ref = dataset_ref.table(TABLE) job.allow_large_results = True diff --git a/bigquery/tests/unit/test_query.py b/bigquery/tests/unit/test_query.py index 0bf0c17c3102..ee2783744c94 100644 --- a/bigquery/tests/unit/test_query.py +++ b/bigquery/tests/unit/test_query.py @@ -206,7 +206,7 @@ def test_from_query_job(self): job = QueryJob( self.JOB_NAME, self.QUERY, client, udf_resources=[UDFResource("resourceUri", RESOURCE_URI)]) - dataset = job.default_dataset = Dataset(DS_NAME, client) + dataset = job.default_dataset = Dataset(DS_NAME) job.use_query_cache = True job.use_legacy_sql = True klass = self._get_target_class() @@ -744,7 +744,7 @@ def __init__(self, project='project', connection=None): def dataset(self, name): from google.cloud.bigquery.dataset import Dataset - return Dataset(name, client=self) + return Dataset(name) class _Connection(object):