Skip to content

Commit

Permalink
fix(ingestion/bigquery): user exceeded quota for concurrent project.l…
Browse files Browse the repository at this point in the history
…ists requests (datahub-project#10578)

Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
2 people authored and sleeperdeep committed Jun 25, 2024
1 parent 94e826c commit 3610da0
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

@dataclass
class BigQuerySchemaApiPerfReport(Report):
num_list_projects: int = 0
num_list_projects_retry_request: int = 0
list_projects: PerfTimer = field(default_factory=PerfTimer)
list_datasets: PerfTimer = field(default_factory=PerfTimer)
get_columns_for_dataset: PerfTimer = field(default_factory=PerfTimer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime, timezone
from typing import Any, Dict, Iterable, Iterator, List, Optional

from google.api_core import retry
from google.cloud import bigquery, datacatalog_v1
from google.cloud.bigquery.table import (
RowIterator,
Expand Down Expand Up @@ -154,14 +155,33 @@ def get_query_result(self, query: str) -> RowIterator:
return resp.result()

def get_projects(self) -> List[BigqueryProject]:
def _should_retry(exc: BaseException) -> bool:
logger.debug(
f"Exception occured for project.list api. Reason: {exc}. Retrying api request..."
)
self.report.num_list_projects_retry_request += 1
return True

with self.report.list_projects:
try:
projects = self.bq_client.list_projects()

return [
# Bigquery API has limit in calling project.list request i.e. 2 request per second.
# https://cloud.google.com/bigquery/quotas#api_request_quotas
# Whenever this limit reached an exception occur with msg
# 'Quota exceeded: Your user exceeded quota for concurrent project.lists requests.'
# Hence, added the api request retry of 15 min.
# We already tried adding rate_limit externally, proving max_result and page_size
# to restrict the request calls inside list_project but issue still occured.
projects_iterator = self.bq_client.list_projects(
retry=retry.Retry(
predicate=_should_retry, initial=10, maximum=180, timeout=900
)
)
projects: List[BigqueryProject] = [
BigqueryProject(id=p.project_id, name=p.friendly_name)
for p in projects
for p in projects_iterator
]
self.report.num_list_projects = len(projects)
return projects
except Exception as e:
logger.error(f"Error getting projects. {e}", exc_info=True)
return []
Expand Down

0 comments on commit 3610da0

Please sign in to comment.