-
Notifications
You must be signed in to change notification settings - Fork 306
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add default timeout for Client.get_job() #1935
Changes from 18 commits
83c63e2
3b0b09b
4231ba1
24baa1c
358b32e
73ad570
66fde13
2a23cda
1f1abc3
5728272
c06ca38
1d58589
9bfe28c
97844d1
0b1220e
1a5fa7c
ed5f6cb
4d77e36
bc398ee
ef7b71e
56893c0
4987ccd
253e274
bd25eea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,14 +19,14 @@ | |
import http | ||
import threading | ||
import typing | ||
from typing import ClassVar, Dict, Optional, Sequence | ||
from typing import Any, ClassVar, Dict, Optional, Sequence, Union | ||
|
||
from google.api_core import retry as retries | ||
from google.api_core import exceptions | ||
import google.api_core.future.polling | ||
|
||
from google.cloud.bigquery import _helpers | ||
from google.cloud.bigquery.retry import DEFAULT_RETRY | ||
from google.cloud.bigquery.retry import DEFAULT_RETRY, POLLING_DEFAULT_VALUE | ||
from google.cloud.bigquery._helpers import _int_or_none | ||
|
||
|
||
|
@@ -801,7 +801,7 @@ def reload( | |
self, | ||
client=None, | ||
retry: "retries.Retry" = DEFAULT_RETRY, | ||
timeout: Optional[float] = None, | ||
timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE, | ||
): | ||
"""API call: refresh job properties via a GET request. | ||
|
||
|
@@ -814,28 +814,39 @@ def reload( | |
``client`` stored on the current dataset. | ||
|
||
retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. | ||
timeout (Optional[float]): | ||
timeout (Optinal[Union[float, \ | ||
google.api_core.future.polling.PollingFuture._DEFAULT_VALUE, \ | ||
]]): | ||
The number of seconds to wait for the underlying HTTP transport | ||
before using ``retry``. | ||
""" | ||
client = self._require_client(client) | ||
|
||
extra_params = {} | ||
if self.location: | ||
extra_params["location"] = self.location | ||
span_attributes = {"path": self.path} | ||
kwargs: Dict[str, Any] = {} | ||
if type(timeout) is object: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add some comments explaining why we need to omit the timeout if we get There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added explanation why |
||
# When timeout is the sentinel value, we use the default API-level | ||
# timeout at Client.get_job(). | ||
pass | ||
elif timeout is None: | ||
# If timeout is None, wait indefinitely even at API-level. | ||
kwargs["timeout"] = None | ||
elif isinstance(timeout, (int, float)): | ||
kwargs["timeout"] = timeout | ||
else: | ||
raise ValueError( | ||
f"Unsupported timeout type {type(timeout)}. " | ||
"Must be float, int, None, or " | ||
"google.api_core.future.polling.PollingFuture._DEFAULT_VALUE." | ||
) | ||
|
||
api_response = client._call_api( | ||
retry, | ||
span_name="BigQuery.job.reload", | ||
span_attributes=span_attributes, | ||
job_ref=self, | ||
method="GET", | ||
path=self.path, | ||
query_params=extra_params, | ||
timeout=timeout, | ||
got_job = client.get_job( | ||
self, | ||
project=self.project, | ||
location=self.location, | ||
retry=retry, | ||
**kwargs, | ||
) | ||
self._set_properties(api_response) | ||
self._set_properties(got_job._properties) | ||
|
||
def cancel( | ||
self, | ||
|
@@ -913,7 +924,7 @@ def _set_future_result(self): | |
def done( | ||
self, | ||
retry: "retries.Retry" = DEFAULT_RETRY, | ||
timeout: Optional[float] = None, | ||
timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here. I don't think this needs to handle There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
reload: bool = True, | ||
) -> bool: | ||
"""Checks if the job is complete. | ||
|
@@ -922,7 +933,9 @@ def done( | |
retry (Optional[google.api_core.retry.Retry]): | ||
How to retry the RPC. If the job state is ``DONE``, retrying is aborted | ||
early, as the job will not change anymore. | ||
timeout (Optional[float]): | ||
timeout (Optinal[Union[float, \ | ||
google.api_core.future.polling.PollingFuture._DEFAULT_VALUE, \ | ||
]]): | ||
The number of seconds to wait for the underlying HTTP transport | ||
before using ``retry``. | ||
reload (Optional[bool]): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,7 +40,11 @@ | |
StructQueryParameter, | ||
UDFResource, | ||
) | ||
from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY | ||
from google.cloud.bigquery.retry import ( | ||
DEFAULT_RETRY, | ||
DEFAULT_JOB_RETRY, | ||
POLLING_DEFAULT_VALUE, | ||
) | ||
from google.cloud.bigquery.routine import RoutineReference | ||
from google.cloud.bigquery.schema import SchemaField | ||
from google.cloud.bigquery.table import _EmptyRowIterator | ||
|
@@ -1437,7 +1441,7 @@ def result( # type: ignore # (incompatible with supertype) | |
page_size: Optional[int] = None, | ||
max_results: Optional[int] = None, | ||
retry: Optional[retries.Retry] = DEFAULT_RETRY, | ||
timeout: Optional[float] = None, | ||
timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE, | ||
start_index: Optional[int] = None, | ||
job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, | ||
) -> Union["RowIterator", _EmptyRowIterator]: | ||
|
@@ -1457,11 +1461,14 @@ def result( # type: ignore # (incompatible with supertype) | |
is ``DONE``, retrying is aborted early even if the | ||
results are not available, as this will not change | ||
anymore. | ||
timeout (Optional[float]): | ||
timeout (Optinal[Union[float, \ | ||
google.api_core.future.polling.PollingFuture._DEFAULT_VALUE, \ | ||
]]): | ||
The number of seconds to wait for the underlying HTTP transport | ||
before using ``retry``. | ||
If multiple requests are made under the hood, ``timeout`` | ||
applies to each individual request. | ||
before using ``retry``. If ``None``, wait indefinitely | ||
unless a retriable error is returned. If unset, only the | ||
Linchin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
underlying Client.get_job() API call has timeout, but we still | ||
Linchin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
wait indefinitely for the job to finish. | ||
start_index (Optional[int]): | ||
The zero-based index of the starting row to read. | ||
job_retry (Optional[google.api_core.retry.Retry]): | ||
|
@@ -1507,6 +1514,15 @@ def result( # type: ignore # (incompatible with supertype) | |
# Intentionally omit job_id and query_id since this doesn't | ||
# actually correspond to a finished query job. | ||
) | ||
|
||
# When timeout has default sentinel value, only pass the sentinel | ||
# timeout to QueryJob.done(), and use None for the other timeouts. | ||
if type(timeout) is object: | ||
timeout = None | ||
get_job_timeout = POLLING_DEFAULT_VALUE | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not set to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
else: | ||
get_job_timeout = timeout | ||
|
||
try: | ||
retry_do_query = getattr(self, "_retry_do_query", None) | ||
if retry_do_query is not None: | ||
|
@@ -1548,7 +1564,7 @@ def is_job_done(): | |
# rateLimitExceeded errors are ambiguous. We want to know if | ||
# the query job failed and not just the call to | ||
# jobs.getQueryResults. | ||
if self.done(retry=retry, timeout=timeout): | ||
if self.done(retry=retry, timeout=get_job_timeout): | ||
# If it's already failed, we might as well stop. | ||
job_failed_exception = self.exception() | ||
if job_failed_exception is not None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd much rather this defaulted to the same timeout as
get_job()
and anywhere that callsreload()
, we make sure we do the check forPOLLING_DEFAULT_VALUE
before callingreload()
to make sure we only pass infloat
orNone
or omit the value.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done