From 89766ae32664345427e1337482d727a7fac1443d Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Dec 2024 14:48:02 -0500 Subject: [PATCH 1/5] changelog --- .changes/unreleased/Fixes-20241211-144752.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20241211-144752.yaml diff --git a/.changes/unreleased/Fixes-20241211-144752.yaml b/.changes/unreleased/Fixes-20241211-144752.yaml new file mode 100644 index 000000000..e666d5c31 --- /dev/null +++ b/.changes/unreleased/Fixes-20241211-144752.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix retry scenarios so that dbt always retries when BigQuery recommends a retry +time: 2024-12-11T14:47:52.36905-05:00 +custom: + Author: mikealfare + Issue: "263" From 5900683262edf7ac9e9c57cfde373c01388cc771 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Dec 2024 15:33:44 -0500 Subject: [PATCH 2/5] replace our custom list of retryable exceptions with BigQuery's defaults; keep BadRequest as retryable as that is not in BigQuery's defaults and could cause a breaking change by removing it --- dbt/adapters/bigquery/retry.py | 24 +++++++------------ .../unit/test_bigquery_connection_manager.py | 2 +- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/dbt/adapters/bigquery/retry.py b/dbt/adapters/bigquery/retry.py index 391c00e46..36200703b 100644 --- a/dbt/adapters/bigquery/retry.py +++ b/dbt/adapters/bigquery/retry.py @@ -3,8 +3,8 @@ from google.api_core.exceptions import Forbidden from google.api_core.future.polling import DEFAULT_POLLING from google.api_core.retry import Retry -from google.cloud.bigquery.retry import DEFAULT_RETRY -from google.cloud.exceptions import BadGateway, BadRequest, ServerError +from google.cloud.bigquery.retry import DEFAULT_RETRY, _job_should_retry +from google.cloud.exceptions import BadRequest from requests.exceptions import ConnectionError from dbt.adapters.contracts.connection import Connection, ConnectionState @@ -74,6 +74,11 @@ def __init__(self, retries: int) -> None: self._retries: int = retries self._error_count = 0 + @staticmethod + def _is_retryable(error: Exception) -> bool: + """Return true for errors that are unlikely to occur again if retried.""" + return _job_should_retry(error) or isinstance(error, BadRequest) + def __call__(self, error: Exception) -> bool: # exit immediately if the user does not want retries if self._retries == 0: @@ -83,7 +88,7 @@ def __call__(self, error: Exception) -> bool: self._error_count += 1 # if the error is retryable, and we haven't breached the threshold, log and continue - if _is_retryable(error) and self._error_count <= self._retries: + if self._is_retryable(error) and self._error_count <= self._retries: _logger.debug( f"Retry attempt {self._error_count} of {self._retries} after error: {repr(error)}" ) @@ -113,16 +118,3 @@ def on_error(error: Exception): raise FailedToConnectError(str(e)) return on_error - - -def _is_retryable(error: Exception) -> bool: - """Return true for errors that are unlikely to occur again if retried.""" - if isinstance( - error, (BadGateway, BadRequest, ConnectionError, ConnectionResetError, ServerError) - ): - return True - elif isinstance(error, Forbidden) and any( - e["reason"] == "rateLimitExceeded" for e in error.errors - ): - return True - return False diff --git a/tests/unit/test_bigquery_connection_manager.py b/tests/unit/test_bigquery_connection_manager.py index d4c95792e..c23ffc58b 100644 --- a/tests/unit/test_bigquery_connection_manager.py +++ b/tests/unit/test_bigquery_connection_manager.py @@ -53,7 +53,7 @@ def generate_connection_reset_error(): assert new_mock_client is not self.mock_client def test_is_retryable(self): - _is_retryable = dbt.adapters.bigquery.retry._is_retryable + _is_retryable = dbt.adapters.bigquery.retry._DeferredException(1)._is_retryable exceptions = dbt.adapters.bigquery.impl.google.cloud.exceptions internal_server_error = exceptions.InternalServerError("code broke") bad_request_error = exceptions.BadRequest("code broke") From b96493ae0227c1165e480d5c08e2136e5c67b8d6 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Dec 2024 15:39:16 -0500 Subject: [PATCH 3/5] remove unused imports --- dbt/adapters/bigquery/retry.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/adapters/bigquery/retry.py b/dbt/adapters/bigquery/retry.py index 36200703b..47a94f252 100644 --- a/dbt/adapters/bigquery/retry.py +++ b/dbt/adapters/bigquery/retry.py @@ -1,6 +1,5 @@ from typing import Callable, Optional -from google.api_core.exceptions import Forbidden from google.api_core.future.polling import DEFAULT_POLLING from google.api_core.retry import Retry from google.cloud.bigquery.retry import DEFAULT_RETRY, _job_should_retry From 0d4994ce0fee269fbf40a80daf445c8824dcbe02 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Dec 2024 16:48:34 -0500 Subject: [PATCH 4/5] remove BadRequest as a retryable error --- dbt/adapters/bigquery/retry.py | 7 +------ tests/unit/test_bigquery_connection_manager.py | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/dbt/adapters/bigquery/retry.py b/dbt/adapters/bigquery/retry.py index 47a94f252..df1cae0b4 100644 --- a/dbt/adapters/bigquery/retry.py +++ b/dbt/adapters/bigquery/retry.py @@ -3,7 +3,6 @@ from google.api_core.future.polling import DEFAULT_POLLING from google.api_core.retry import Retry from google.cloud.bigquery.retry import DEFAULT_RETRY, _job_should_retry -from google.cloud.exceptions import BadRequest from requests.exceptions import ConnectionError from dbt.adapters.contracts.connection import Connection, ConnectionState @@ -73,10 +72,6 @@ def __init__(self, retries: int) -> None: self._retries: int = retries self._error_count = 0 - @staticmethod - def _is_retryable(error: Exception) -> bool: - """Return true for errors that are unlikely to occur again if retried.""" - return _job_should_retry(error) or isinstance(error, BadRequest) def __call__(self, error: Exception) -> bool: # exit immediately if the user does not want retries @@ -87,7 +82,7 @@ def __call__(self, error: Exception) -> bool: self._error_count += 1 # if the error is retryable, and we haven't breached the threshold, log and continue - if self._is_retryable(error) and self._error_count <= self._retries: + if _job_should_retry(error) and self._error_count <= self._retries: _logger.debug( f"Retry attempt {self._error_count} of {self._retries} after error: {repr(error)}" ) diff --git a/tests/unit/test_bigquery_connection_manager.py b/tests/unit/test_bigquery_connection_manager.py index c23ffc58b..33df7ebaa 100644 --- a/tests/unit/test_bigquery_connection_manager.py +++ b/tests/unit/test_bigquery_connection_manager.py @@ -53,7 +53,7 @@ def generate_connection_reset_error(): assert new_mock_client is not self.mock_client def test_is_retryable(self): - _is_retryable = dbt.adapters.bigquery.retry._DeferredException(1)._is_retryable + _is_retryable = google.cloud.bigquery.retry._job_should_retry exceptions = dbt.adapters.bigquery.impl.google.cloud.exceptions internal_server_error = exceptions.InternalServerError("code broke") bad_request_error = exceptions.BadRequest("code broke") @@ -65,7 +65,7 @@ def test_is_retryable(self): service_unavailable_error = exceptions.ServiceUnavailable("service is unavailable") self.assertTrue(_is_retryable(internal_server_error)) - self.assertTrue(_is_retryable(bad_request_error)) + self.assertFalse(_is_retryable(bad_request_error)) # this was removed after initially being included self.assertTrue(_is_retryable(connection_error)) self.assertFalse(_is_retryable(client_error)) self.assertTrue(_is_retryable(rate_limit_error)) From a17b0608e957b625719841cba1dc41fe8c999bdf Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Dec 2024 16:51:55 -0500 Subject: [PATCH 5/5] code quality checks --- dbt/adapters/bigquery/retry.py | 1 - tests/unit/test_bigquery_connection_manager.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/bigquery/retry.py b/dbt/adapters/bigquery/retry.py index df1cae0b4..2cbdaa245 100644 --- a/dbt/adapters/bigquery/retry.py +++ b/dbt/adapters/bigquery/retry.py @@ -72,7 +72,6 @@ def __init__(self, retries: int) -> None: self._retries: int = retries self._error_count = 0 - def __call__(self, error: Exception) -> bool: # exit immediately if the user does not want retries if self._retries == 0: diff --git a/tests/unit/test_bigquery_connection_manager.py b/tests/unit/test_bigquery_connection_manager.py index 33df7ebaa..e7afd692f 100644 --- a/tests/unit/test_bigquery_connection_manager.py +++ b/tests/unit/test_bigquery_connection_manager.py @@ -65,7 +65,9 @@ def test_is_retryable(self): service_unavailable_error = exceptions.ServiceUnavailable("service is unavailable") self.assertTrue(_is_retryable(internal_server_error)) - self.assertFalse(_is_retryable(bad_request_error)) # this was removed after initially being included + self.assertFalse( + _is_retryable(bad_request_error) + ) # this was removed after initially being included self.assertTrue(_is_retryable(connection_error)) self.assertFalse(_is_retryable(client_error)) self.assertTrue(_is_retryable(rate_limit_error))