From ec6b17c6bff013b1837d606ed6e8117f63993db5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 12 Jan 2022 15:25:16 -0600 Subject: [PATCH] fix: remove query text from exception message, use `exception.debug_message` instead (#1105) Since query text can potentially contain sensitive information, remove it from the default exception message. This information is useful for debugging, so provide a `debug_message` attribute, which is not included in the exception representation (and thus the logs). Fixes internal issue 211616590 --- google/cloud/bigquery/job/query.py | 19 +++++++++++++------ tests/unit/job/test_query.py | 20 ++++++++++++++++---- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 36e388238..2dd945984 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -66,6 +66,7 @@ _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) +_EXCEPTION_FOOTER_TEMPLATE = "{message}\n\nLocation: {location}\nJob ID: {job_id}\n" _TIMEOUT_BUFFER_SECS = 0.1 @@ -1196,17 +1197,17 @@ def _blocking_poll(self, timeout=None, **kwargs): super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) @staticmethod - def _format_for_exception(query, job_id): + def _format_for_exception(message: str, query: str): """Format a query for the output in exception message. Args: + message (str): The original exception message. query (str): The SQL query to format. - job_id (str): The ID of the job that ran the query. Returns: str: A formatted query text. """ - template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" + template = "{message}\n\n{header}\n\n{ruler}\n{body}\n{ruler}" lines = query.splitlines() max_line_len = max(len(line) for line in lines) @@ -1223,7 +1224,7 @@ def _format_for_exception(query, job_id): "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) ) - return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + return template.format(message=message, header=header, ruler=ruler, body=body) def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): """API call: begin the job via a POST request @@ -1248,7 +1249,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): try: super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) + exc.message = _EXCEPTION_FOOTER_TEMPLATE.format( + message=exc.message, location=self.location, job_id=self.job_id + ) + exc.debug_message = self._format_for_exception(exc.message, self.query) exc.query_job = self raise @@ -1447,7 +1451,10 @@ def do_get_result(): do_get_result() except exceptions.GoogleAPICallError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) + exc.message = _EXCEPTION_FOOTER_TEMPLATE.format( + message=exc.message, location=self.location, job_id=self.job_id + ) + exc.debug_message = self._format_for_exception(exc.message, self.query) # type: ignore exc.query_job = self # type: ignore raise except requests.exceptions.Timeout as exc: diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 4da035b78..5fb76b9e9 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1360,13 +1360,19 @@ def test_result_error(self): exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) + # Query text could contain sensitive information, so it must not be + # included in logs / exception representation. full_text = str(exc_info.exception) assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text + assert "Query Job SQL Follows" not in full_text + # It is useful to have query text available, so it is provided in a + # debug_message property. + debug_message = exc_info.exception.debug_message + assert "Query Job SQL Follows" in debug_message for i, line in enumerate(query.splitlines(), start=1): expected_line = "{}:{}".format(i, line) - assert expected_line in full_text + assert expected_line in debug_message def test_result_transport_timeout_error(self): query = textwrap.dedent( @@ -1452,13 +1458,19 @@ def test__begin_error(self): exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) + # Query text could contain sensitive information, so it must not be + # included in logs / exception representation. full_text = str(exc_info.exception) assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text + assert "Query Job SQL Follows" not in full_text + # It is useful to have query text available, so it is provided in a + # debug_message property. + debug_message = exc_info.exception.debug_message + assert "Query Job SQL Follows" in debug_message for i, line in enumerate(query.splitlines(), start=1): expected_line = "{}:{}".format(i, line) - assert expected_line in full_text + assert expected_line in debug_message def test__begin_w_timeout(self): PATH = "/projects/%s/jobs" % (self.PROJECT,)