From 6f1ce48b1de231e4fb8122ac92fe1990c8c318fa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 14 Apr 2021 16:44:39 -0500 Subject: [PATCH 1/4] feat: add `Client.delete_job` method to remove job metadata Note: this only removes job metadata. Use `Client.cancel_job` to stop a running job. Also, this feature is in preview and has not rolled out to all regions yet Location is required, so always pass in location. To keep the method signature consistent with the other methods, location is not a positional argument. The location from the job object is preferred. --- google/cloud/bigquery/client.py | 71 +++++++++++++++++++++++++++++++++ tests/system/test_client.py | 24 ++++++++++- tests/unit/test_client.py | 60 ++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8211e23a3..c46668b56 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1453,6 +1453,77 @@ def delete_model( if not not_found_ok: raise + def delete_job( + self, + job_id, + project=None, + location=None, + retry=DEFAULT_RETRY, + timeout=None, + not_found_ok=False, + ): + """[Beta] Delete job metadata from job history. + + Note: This does not stop a running job. Use + :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + + Keyword Arguments: + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + not_found_ok (Optional[bool]): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the job. + """ + extra_params = {} + + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + + if project is None: + project = self.project + + if location is None: + location = self.location + + # Location is always required for jobs.delete() + extra_params["location"] = location + + path = "/projects/{}/jobs/{}/delete".format(project, job_id) + + span_attributes = {"path": path, "job_id": job_id, "location": location} + + try: + self._call_api( + retry, + span_name="BigQuery.deleteJob", + span_attributes=span_attributes, + method="DELETE", + path=path, + query_params=extra_params, + timeout=timeout, + ) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise + def delete_routine( self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False ): diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f31d994ca..a9f272c69 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,6 +25,7 @@ import time import unittest import uuid +from typing import Optional import psutil import pytest @@ -62,6 +63,7 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -123,7 +125,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "%s%s" % (prefix, unique_resource_id()) + return "python_bigquery_tests_system_%s%s" % (prefix, unique_resource_id()) def _load_json_schema(filename="schema.json"): @@ -142,7 +144,7 @@ class Config(object): global state. """ - CLIENT = None + CLIENT: Optional[bigquery.Client] = None CURSOR = None DATASET = None @@ -430,6 +432,24 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) + def test_delete_job(self): + dataset_id = _make_dataset_id("us_east1") + self.temp_dataset(dataset_id, location="us-east1") + full_table_id = ( + f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_explicit_location" + ) + table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) + Config.CLIENT.create_table(table) + query_job: bigquery.QueryJob = Config.CLIENT.query( + f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", + ) + query_job.result() + self.assertIsNotNone(Config.CLIENT.get_job(query_job)) + + Config.CLIENT.delete_job(query_job) + with self.assertRaises(NotFound): + Config.CLIENT.get_job(query_job) + def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c5e742c9e..b021b9f4b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2493,6 +2493,66 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) + def test_delete_job_not_found(self): + creds = _make_credentials() + client = self._make_one("client-proj", creds, location="client-loc") + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("job not found"), + google.api_core.exceptions.NotFound("job not found"), + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_job("my-job") + + conn.api_request.reset_mock() + client.delete_job("my-job", not_found_ok=True) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/client-proj/jobs/my-job/delete", + query_params={"location": "client-loc"}, + timeout=None, + ) + + def test_delete_job_with_id(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}) + + client.delete_job("my-job", project="param-proj", location="param-loc") + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/param-proj/jobs/my-job/delete", + query_params={"location": "param-loc"}, + timeout=None, + ) + + def test_delete_job_with_resource(self): + from google.cloud.bigquery.job import QueryJob + + query_resource = { + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "us-east1", + }, + "configuration": {"query": {}}, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection(query_resource) + job_from_resource = QueryJob.from_api_repr(query_resource, client) + + client.delete_job(job_from_resource) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/job-based-proj/jobs/query_job/delete", + query_params={"location": "us-east1"}, + timeout=None, + ) + def test_delete_model(self): from google.cloud.bigquery.model import Model From a1b9375d2f1f8da464fc98f8d067095e9ce0b13f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 23 Apr 2021 15:59:00 -0500 Subject: [PATCH 2/4] Update google/cloud/bigquery/client.py Co-authored-by: Peter Lamut --- google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c46668b56..65fa84278 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1506,7 +1506,7 @@ def delete_job( # Location is always required for jobs.delete() extra_params["location"] = location - path = "/projects/{}/jobs/{}/delete".format(project, job_id) + path = f"/projects/{project}/jobs/{job_id}/delete" span_attributes = {"path": path, "job_id": job_id, "location": location} From 1e29763ad974df2ba2c8c6e63c0d0490b8c5e45d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 23 Apr 2021 15:59:10 -0500 Subject: [PATCH 3/4] Update tests/system/test_client.py Co-authored-by: Peter Lamut --- tests/system/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index a9f272c69..52c5e835b 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -125,7 +125,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "python_bigquery_tests_system_%s%s" % (prefix, unique_resource_id()) + return "python_bigquery_tests_system_{prefix}{unique_resource_id()}" def _load_json_schema(filename="schema.json"): From 4ecc09feca81dcd664a2bc72cb31dba25051f933 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 23 Apr 2021 16:39:01 -0500 Subject: [PATCH 4/4] rename to delete_job_metadata --- google/cloud/bigquery/client.py | 2 +- tests/system/test_client.py | 10 ++++------ tests/unit/test_client.py | 14 +++++++------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 65fa84278..f7bf378e6 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1453,7 +1453,7 @@ def delete_model( if not not_found_ok: raise - def delete_job( + def delete_job_metadata( self, job_id, project=None, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 52c5e835b..e71788a43 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -125,7 +125,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "python_bigquery_tests_system_{prefix}{unique_resource_id()}" + return f"python_bigquery_tests_system_{prefix}{unique_resource_id()}" def _load_json_schema(filename="schema.json"): @@ -432,12 +432,10 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) - def test_delete_job(self): + def test_delete_job_metadata(self): dataset_id = _make_dataset_id("us_east1") self.temp_dataset(dataset_id, location="us-east1") - full_table_id = ( - f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_explicit_location" - ) + full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) Config.CLIENT.create_table(table) query_job: bigquery.QueryJob = Config.CLIENT.query( @@ -446,7 +444,7 @@ def test_delete_job(self): query_job.result() self.assertIsNotNone(Config.CLIENT.get_job(query_job)) - Config.CLIENT.delete_job(query_job) + Config.CLIENT.delete_job_metadata(query_job) with self.assertRaises(NotFound): Config.CLIENT.get_job(query_job) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8fe1da1f7..8f535145b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2498,7 +2498,7 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) - def test_delete_job_not_found(self): + def test_delete_job_metadata_not_found(self): creds = _make_credentials() client = self._make_one("client-proj", creds, location="client-loc") conn = client._connection = make_connection( @@ -2507,10 +2507,10 @@ def test_delete_job_not_found(self): ) with self.assertRaises(google.api_core.exceptions.NotFound): - client.delete_job("my-job") + client.delete_job_metadata("my-job") conn.api_request.reset_mock() - client.delete_job("my-job", not_found_ok=True) + client.delete_job_metadata("my-job", not_found_ok=True) conn.api_request.assert_called_once_with( method="DELETE", @@ -2519,12 +2519,12 @@ def test_delete_job_not_found(self): timeout=None, ) - def test_delete_job_with_id(self): + def test_delete_job_metadata_with_id(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection({}) - client.delete_job("my-job", project="param-proj", location="param-loc") + client.delete_job_metadata("my-job", project="param-proj", location="param-loc") conn.api_request.assert_called_once_with( method="DELETE", @@ -2533,7 +2533,7 @@ def test_delete_job_with_id(self): timeout=None, ) - def test_delete_job_with_resource(self): + def test_delete_job_metadata_with_resource(self): from google.cloud.bigquery.job import QueryJob query_resource = { @@ -2549,7 +2549,7 @@ def test_delete_job_with_resource(self): conn = client._connection = make_connection(query_resource) job_from_resource = QueryJob.from_api_repr(query_resource, client) - client.delete_job(job_from_resource) + client.delete_job_metadata(job_from_resource) conn.api_request.assert_called_once_with( method="DELETE",