diff --git a/cartography/intel/github/teams.py b/cartography/intel/github/teams.py index cd09dab28..b32ad3a80 100644 --- a/cartography/intel/github/teams.py +++ b/cartography/intel/github/teams.py @@ -1,6 +1,5 @@ import logging from collections import namedtuple -from time import sleep from typing import Any from typing import Dict from typing import List @@ -13,6 +12,7 @@ from cartography.intel.github.util import fetch_all from cartography.intel.github.util import PaginatedGraphqlData from cartography.models.github.teams import GitHubTeamSchema +from cartography.util import retries_with_backoff from cartography.util import timeit logger = logging.getLogger(__name__) @@ -23,6 +23,18 @@ UserRole = namedtuple('UserRole', ['user_url', 'role']) +def backoff_handler(details: Dict) -> None: + """ + Custom backoff handler for GitHub calls in this module. + """ + team_name = details['kwargs'].get('team_name') or 'not present in kwargs' + updated_details = {**details, 'team_name': team_name} + logger.warning( + "Backing off {wait:0.1f} seconds after {tries} tries. Calling function {target} for team {team_name}" + .format(**updated_details), + ) + + @timeit def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData, Dict[str, Any]]: org_teams_gql = """ @@ -70,40 +82,27 @@ def _get_team_repos_for_multiple_teams( result[team_name] = [] continue - repo_urls = [] - repo_permissions = [] + repo_urls: List[str] = [] + repo_permissions: List[str] = [] - max_tries = 5 - - for current_try in range(1, max_tries + 1): + def get_teams_repos_inner_func( + org: str, api_url: str, token: str, team_name: str, + repo_urls: List[str], repo_permissions: List[str], + ) -> None: + logger.info(f"Loading team repos for {team_name}.") team_repos = _get_team_repos(org, api_url, token, team_name) - - try: - # The `or []` is because `.nodes` can be None. See: - # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection - for repo in team_repos.nodes or []: - repo_urls.append(repo['url']) - - # The `or []` is because `.edges` can be None. - for edge in team_repos.edges or []: - repo_permissions.append(edge['permission']) - # We're done! Break out of the retry loop. - break - - except TypeError: - # Handles issue #1334 - logger.warning( - f"GitHub returned None when trying to find repo or permission data for team {team_name}.", - exc_info=True, - ) - if current_try == max_tries: - raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.") - sleep_time_seconds = current_try ** 2 - logger.warning( - f"Waiting {sleep_time_seconds} seconds before retrying to find repo/perm for team {team_name}.", - ) - sleep(sleep_time_seconds) - + # The `or []` is because `.nodes` can be None. See: + # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection + for repo in team_repos.nodes or []: + repo_urls.append(repo['url']) + # The `or []` is because `.edges` can be None. + for edge in team_repos.edges or []: + repo_permissions.append(edge['permission']) + + retries_with_backoff(get_teams_repos_inner_func, TypeError, 5, backoff_handler)( + org=org, api_url=api_url, token=token, team_name=team_name, + repo_urls=repo_urls, repo_permissions=repo_permissions, + ) # Shape = [(repo_url, 'WRITE'), ...]] result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)] return result @@ -168,39 +167,26 @@ def _get_team_users_for_multiple_teams( result[team_name] = [] continue - user_urls = [] - user_roles = [] + user_urls: List[str] = [] + user_roles: List[str] = [] - max_tries = 5 - - for current_try in range(1, max_tries + 1): + def get_teams_users_inner_func( + org: str, api_url: str, token: str, team_name: str, + user_urls: List[str], user_roles: List[str], + ) -> None: + logger.info(f"Loading team users for {team_name}.") team_users = _get_team_users(org, api_url, token, team_name) - - try: - # The `or []` is because `.nodes` can be None. See: - # https://docs.github.com/en/graphql/reference/objects#teammemberconnection - for user in team_users.nodes or []: - user_urls.append(user['url']) - - # The `or []` is because `.edges` can be None. - for edge in team_users.edges or []: - user_roles.append(edge['role']) - # We're done! Break out of the retry loop. - break - - except TypeError: - # Handles issue #1334 - logger.warning( - f"GitHub returned None when trying to find user or role data for team {team_name}.", - exc_info=True, - ) - if current_try == max_tries: - raise RuntimeError(f"GitHub returned a None member url for team {team_name}, retries exhausted.") - sleep_time_seconds = current_try ** 2 - logger.warning( - f"Waiting {sleep_time_seconds} seconds before retrying to find user or role for team {team_name}.", - ) - sleep(sleep_time_seconds) + # The `or []` is because `.nodes` can be None. See: + # https://docs.github.com/en/graphql/reference/objects#teammemberconnection + for user in team_users.nodes or []: + user_urls.append(user['url']) + # The `or []` is because `.edges` can be None. + for edge in team_users.edges or []: + user_roles.append(edge['role']) + + retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)( + org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles, + ) # Shape = [(user_url, 'MAINTAINER'), ...]] result[team_name] = [UserRole(url, role) for url, role in zip(user_urls, user_roles)] diff --git a/cartography/util.py b/cartography/util.py index 6c329272f..fbb9358d0 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -15,6 +15,7 @@ from typing import List from typing import Optional from typing import Set +from typing import Type from typing import TypeVar from typing import Union @@ -288,6 +289,25 @@ def inner_function(*args, **kwargs): # type: ignore return cast(AWSGetFunc, inner_function) +def retries_with_backoff( + func: Callable, + exceptionType: Type[Exception], max_tries: int, on_backoff: Callable, +) -> Callable: + """ + Adds retry with backoff to the given function. (Could expand the possible input parameters as needed.) + """ + @wraps(func) + @backoff.on_exception( + backoff.expo, + exceptionType, + max_tries=max_tries, + on_backoff=on_backoff, + ) + def inner_function(*args, **kwargs): # type: ignore + return func(*args, **kwargs) + return cast(Callable, inner_function) + + def dict_value_to_str(obj: Dict, key: str) -> Optional[str]: """ Convert the value referenced by the key in the dict to a string, if it exists, and return it. If it doesn't exist, diff --git a/tests/unit/cartography/intel/github/test_teams.py b/tests/unit/cartography/intel/github/test_teams.py index fa54bf9f1..defeaea15 100644 --- a/tests/unit/cartography/intel/github/test_teams.py +++ b/tests/unit/cartography/intel/github/test_teams.py @@ -123,8 +123,8 @@ def test_get_team_users_happy_path(mock_get_team_users): @patch('cartography.intel.github.teams._get_team_repos') -@patch('cartography.intel.github.teams.sleep') -def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos): +@patch('cartography.intel.github.teams.backoff_handler', spec=True) +def test_get_team_repos_github_returns_none(mock_backoff_handler, mock_get_team_repos): # Arrange team_data = [{'slug': 'team1', 'repositories': {'totalCount': 1}}] mock_team_repos = MagicMock() @@ -134,7 +134,7 @@ def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos): mock_get_team_repos.return_value = mock_team_repos # Assert we raise an exception - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): _get_team_repos_for_multiple_teams( team_data, 'test-org', @@ -144,12 +144,12 @@ def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos): # Assert that we retry and give up assert mock_get_team_repos.call_count == 5 - assert mock_sleep.call_count == 4 + assert mock_backoff_handler.call_count == 4 @patch('cartography.intel.github.teams._get_team_users') -@patch('cartography.intel.github.teams.sleep') -def test_get_team_users_github_returns_none(mock_sleep, mock_get_team_users): +@patch('cartography.intel.github.teams.backoff_handler', spec=True) +def test_get_team_users_github_returns_none(mock_backoff_handler, mock_get_team_users): # Arrange team_data = [{'slug': 'team1', 'members': {'totalCount': 1}}] mock_team_users = MagicMock() @@ -159,7 +159,7 @@ def test_get_team_users_github_returns_none(mock_sleep, mock_get_team_users): mock_get_team_users.return_value = mock_team_users # Assert we raise an exception - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): _get_team_users_for_multiple_teams( team_data, 'test-org', @@ -169,7 +169,7 @@ def test_get_team_users_github_returns_none(mock_sleep, mock_get_team_users): # Assert that we retry and give up assert mock_get_team_users.call_count == 5 - assert mock_sleep.call_count == 4 + assert mock_backoff_handler.call_count == 4 def test_transform_teams_empty_team_data():