From 35c28d01e068a3c6258c3d2c9df9ded41a4cf02d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 14 Apr 2021 16:44:39 -0500 Subject: [PATCH] feat: add `Client.delete_job` method to remove job metadata Note: this only removes job metadata. Use `Client.cancel_job` to stop a running job. Also, this feature is in preview and has not rolled out to all regions yet Location is required, so always pass in location. To keep the method signature consistent with the other methods, location is not a positional argument. The location from the job object is preferred. --- google/cloud/bigquery/client.py | 71 +++++++++++++++++++++++++++++++++ tests/system/test_client.py | 24 ++++++++++- tests/unit/test_client.py | 60 ++++++++++++++++++++++++++++ 3 files changed, 153 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8211e23a3a..c46668b566 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1453,6 +1453,77 @@ def delete_model( if not not_found_ok: raise + def delete_job( + self, + job_id, + project=None, + location=None, + retry=DEFAULT_RETRY, + timeout=None, + not_found_ok=False, + ): + """[Beta] Delete job metadata from job history. + + Note: This does not stop a running job. Use + :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + + Keyword Arguments: + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + not_found_ok (Optional[bool]): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the job. + """ + extra_params = {} + + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + + if project is None: + project = self.project + + if location is None: + location = self.location + + # Location is always required for jobs.delete() + extra_params["location"] = location + + path = "/projects/{}/jobs/{}/delete".format(project, job_id) + + span_attributes = {"path": path, "job_id": job_id, "location": location} + + try: + self._call_api( + retry, + span_name="BigQuery.deleteJob", + span_attributes=span_attributes, + method="DELETE", + path=path, + query_params=extra_params, + timeout=timeout, + ) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise + def delete_routine( self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False ): diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f31d994cae..3d9f63f76e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -17,6 +17,7 @@ import csv import datetime import decimal +from google.cloud.bigquery.schema import SchemaField import io import json import operator @@ -25,6 +26,7 @@ import time import unittest import uuid +from typing import Optional import psutil import pytest @@ -123,7 +125,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "%s%s" % (prefix, unique_resource_id()) + return "python_bigquery_tests_system_%s%s" % (prefix, unique_resource_id()) def _load_json_schema(filename="schema.json"): @@ -142,7 +144,7 @@ class Config(object): global state. """ - CLIENT = None + CLIENT: Optional[bigquery.Client] = None CURSOR = None DATASET = None @@ -430,6 +432,24 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) + def test_delete_job(self): + dataset_id = _make_dataset_id("us_east1") + self.temp_dataset(dataset_id, location="us-east1") + full_table_id = ( + f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_explicit_location" + ) + table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) + Config.CLIENT.create_table(table) + query_job: bigquery.QueryJob = Config.CLIENT.query( + f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", + ) + query_job.result() + self.assertIsNotNone(Config.CLIENT.get_job(query_job)) + + Config.CLIENT.delete_job(query_job) + with self.assertRaises(NotFound): + Config.CLIENT.get_job(query_job) + def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c5e742c9e7..b021b9f4bb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2493,6 +2493,66 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) + def test_delete_job_not_found(self): + creds = _make_credentials() + client = self._make_one("client-proj", creds, location="client-loc") + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("job not found"), + google.api_core.exceptions.NotFound("job not found"), + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_job("my-job") + + conn.api_request.reset_mock() + client.delete_job("my-job", not_found_ok=True) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/client-proj/jobs/my-job/delete", + query_params={"location": "client-loc"}, + timeout=None, + ) + + def test_delete_job_with_id(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}) + + client.delete_job("my-job", project="param-proj", location="param-loc") + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/param-proj/jobs/my-job/delete", + query_params={"location": "param-loc"}, + timeout=None, + ) + + def test_delete_job_with_resource(self): + from google.cloud.bigquery.job import QueryJob + + query_resource = { + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "us-east1", + }, + "configuration": {"query": {}}, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection(query_resource) + job_from_resource = QueryJob.from_api_repr(query_resource, client) + + client.delete_job(job_from_resource) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/job-based-proj/jobs/query_job/delete", + query_params={"location": "us-east1"}, + timeout=None, + ) + def test_delete_model(self): from google.cloud.bigquery.model import Model