Skip to content

Commit

Permalink
proposed switch to using backoff decorator
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Brauer <[email protected]>
  • Loading branch information
danbrauer committed Dec 10, 2024
1 parent 409e10f commit 9796696
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 72 deletions.
114 changes: 50 additions & 64 deletions cartography/intel/github/teams.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
from collections import namedtuple
from time import sleep
from typing import Any
from typing import Dict
from typing import List
Expand All @@ -13,6 +12,7 @@
from cartography.intel.github.util import fetch_all
from cartography.intel.github.util import PaginatedGraphqlData
from cartography.models.github.teams import GitHubTeamSchema
from cartography.util import retries_with_backoff
from cartography.util import timeit

logger = logging.getLogger(__name__)
Expand All @@ -23,6 +23,18 @@
UserRole = namedtuple('UserRole', ['user_url', 'role'])


def backoff_handler(details: Dict) -> None:
"""
Custom backoff handler for GitHub calls in this module.
"""
team_name = details['kwargs'].get('team_name') or 'not present in kwargs'
updated_details = {**details, 'team_name': team_name}
logger.warning(
"Backing off {wait:0.1f} seconds after {tries} tries. Calling function {target} for team {team_name}"
.format(**updated_details),
)


@timeit
def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData, Dict[str, Any]]:
org_teams_gql = """
Expand Down Expand Up @@ -70,40 +82,27 @@ def _get_team_repos_for_multiple_teams(
result[team_name] = []
continue

repo_urls = []
repo_permissions = []
repo_urls: List[str] = []
repo_permissions: List[str] = []

max_tries = 5

for current_try in range(1, max_tries + 1):
def get_teams_repos_inner_func(
org: str, api_url: str, token: str, team_name: str,
repo_urls: List[str], repo_permissions: List[str],
) -> None:
logger.info(f"Loading team repos for {team_name}.")
team_repos = _get_team_repos(org, api_url, token, team_name)

try:
# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
for repo in team_repos.nodes or []:
repo_urls.append(repo['url'])

# The `or []` is because `.edges` can be None.
for edge in team_repos.edges or []:
repo_permissions.append(edge['permission'])
# We're done! Break out of the retry loop.
break

except TypeError:
# Handles issue #1334
logger.warning(
f"GitHub returned None when trying to find repo or permission data for team {team_name}.",
exc_info=True,
)
if current_try == max_tries:
raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.")
sleep_time_seconds = current_try ** 2
logger.warning(
f"Waiting {sleep_time_seconds} seconds before retrying to find repo/perm for team {team_name}.",
)
sleep(sleep_time_seconds)

# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
for repo in team_repos.nodes or []:
repo_urls.append(repo['url'])
# The `or []` is because `.edges` can be None.
for edge in team_repos.edges or []:
repo_permissions.append(edge['permission'])

retries_with_backoff(get_teams_repos_inner_func, TypeError, 5, backoff_handler)(
org=org, api_url=api_url, token=token, team_name=team_name,
repo_urls=repo_urls, repo_permissions=repo_permissions,
)
# Shape = [(repo_url, 'WRITE'), ...]]
result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)]
return result
Expand Down Expand Up @@ -168,39 +167,26 @@ def _get_team_users_for_multiple_teams(
result[team_name] = []
continue

user_urls = []
user_roles = []
user_urls: List[str] = []
user_roles: List[str] = []

max_tries = 5

for current_try in range(1, max_tries + 1):
def get_teams_users_inner_func(
org: str, api_url: str, token: str, team_name: str,
user_urls: List[str], user_roles: List[str],
) -> None:
logger.info(f"Loading team users for {team_name}.")
team_users = _get_team_users(org, api_url, token, team_name)

try:
# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
for user in team_users.nodes or []:
user_urls.append(user['url'])

# The `or []` is because `.edges` can be None.
for edge in team_users.edges or []:
user_roles.append(edge['role'])
# We're done! Break out of the retry loop.
break

except TypeError:
# Handles issue #1334
logger.warning(
f"GitHub returned None when trying to find user or role data for team {team_name}.",
exc_info=True,
)
if current_try == max_tries:
raise RuntimeError(f"GitHub returned a None member url for team {team_name}, retries exhausted.")
sleep_time_seconds = current_try ** 2
logger.warning(
f"Waiting {sleep_time_seconds} seconds before retrying to find user or role for team {team_name}.",
)
sleep(sleep_time_seconds)
# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
for user in team_users.nodes or []:
user_urls.append(user['url'])
# The `or []` is because `.edges` can be None.
for edge in team_users.edges or []:
user_roles.append(edge['role'])

retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)(
org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles,
)

# Shape = [(user_url, 'MAINTAINER'), ...]]
result[team_name] = [UserRole(url, role) for url, role in zip(user_urls, user_roles)]
Expand Down
20 changes: 20 additions & 0 deletions cartography/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from typing import List
from typing import Optional
from typing import Set
from typing import Type
from typing import TypeVar
from typing import Union

Expand Down Expand Up @@ -288,6 +289,25 @@ def inner_function(*args, **kwargs): # type: ignore
return cast(AWSGetFunc, inner_function)


def retries_with_backoff(
func: Callable,
exceptionType: Type[Exception], max_tries: int, on_backoff: Callable,
) -> Callable:
"""
Adds retry with backoff to the given function. (Could expand the possible input parameters as needed.)
"""
@wraps(func)
@backoff.on_exception(
backoff.expo,
exceptionType,
max_tries=max_tries,
on_backoff=on_backoff,
)
def inner_function(*args, **kwargs): # type: ignore
return func(*args, **kwargs)
return cast(Callable, inner_function)


def dict_value_to_str(obj: Dict, key: str) -> Optional[str]:
"""
Convert the value referenced by the key in the dict to a string, if it exists, and return it. If it doesn't exist,
Expand Down
16 changes: 8 additions & 8 deletions tests/unit/cartography/intel/github/test_teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def test_get_team_users_happy_path(mock_get_team_users):


@patch('cartography.intel.github.teams._get_team_repos')
@patch('cartography.intel.github.teams.sleep')
def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos):
@patch('cartography.intel.github.teams.backoff_handler', spec=True)
def test_get_team_repos_github_returns_none(mock_backoff_handler, mock_get_team_repos):
# Arrange
team_data = [{'slug': 'team1', 'repositories': {'totalCount': 1}}]
mock_team_repos = MagicMock()
Expand All @@ -134,7 +134,7 @@ def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos):
mock_get_team_repos.return_value = mock_team_repos

# Assert we raise an exception
with pytest.raises(RuntimeError):
with pytest.raises(TypeError):
_get_team_repos_for_multiple_teams(
team_data,
'test-org',
Expand All @@ -144,12 +144,12 @@ def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos):

# Assert that we retry and give up
assert mock_get_team_repos.call_count == 5
assert mock_sleep.call_count == 4
assert mock_backoff_handler.call_count == 4


@patch('cartography.intel.github.teams._get_team_users')
@patch('cartography.intel.github.teams.sleep')
def test_get_team_users_github_returns_none(mock_sleep, mock_get_team_users):
@patch('cartography.intel.github.teams.backoff_handler', spec=True)
def test_get_team_users_github_returns_none(mock_backoff_handler, mock_get_team_users):
# Arrange
team_data = [{'slug': 'team1', 'members': {'totalCount': 1}}]
mock_team_users = MagicMock()
Expand All @@ -159,7 +159,7 @@ def test_get_team_users_github_returns_none(mock_sleep, mock_get_team_users):
mock_get_team_users.return_value = mock_team_users

# Assert we raise an exception
with pytest.raises(RuntimeError):
with pytest.raises(TypeError):
_get_team_users_for_multiple_teams(
team_data,
'test-org',
Expand All @@ -169,7 +169,7 @@ def test_get_team_users_github_returns_none(mock_sleep, mock_get_team_users):

# Assert that we retry and give up
assert mock_get_team_users.call_count == 5
assert mock_sleep.call_count == 4
assert mock_backoff_handler.call_count == 4


def test_transform_teams_empty_team_data():
Expand Down

0 comments on commit 9796696

Please sign in to comment.