From 5d5f85677331e250f7eb2fd0cc694c7446af8e64 Mon Sep 17 00:00:00 2001 From: Daniel Brauer Date: Wed, 11 Dec 2024 12:53:06 -0500 Subject: [PATCH] Github "Immediate" User Team Membership (#1395) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary This PR adds to the Github graph, adding user membership in teams, for users who are 'immediate' members of a team. In case it is unclear or for people newer to Github, note: this is focusing on ['immediate'](https://docs.github.com/en/graphql/reference/enums#teammembershiptype) membership to a team, meaning a member is in the team directly as opposed to being in a child team. A user could be considered a member of a team if they are members of a child team, but this PR maps only 'immediate' membership. (In a follow-up PR we'd like to add child teams to the graph, which we think will complete the membership picture.) We think this is a valuable addition to the graph because our broad intent is to understand all access a user has, and (at least in our org) most access to repos is granted via team. If we do not know who is in the team, then, we do not know who has access. #### Illustration of the intention ![Cartography AMPS User Direct Team Membership](https://github.com/user-attachments/assets/7d3d70ab-ab16-4a21-8970-3f9d2b8fe525) #### Screencaps **EXAMPLE USER LOOKUP** BEFORE (empty result because nothing exists) ![Screenshot 2024-12-03 at 5 07 00 PM](https://github.com/user-attachments/assets/908e8e96-179d-494a-ac7d-acc03ee54ab1) AFTER ![Screenshot 2024-12-03 at 5 06 04 PM](https://github.com/user-attachments/assets/073376ee-b8d6-4b68-8d7e-246e9f9601a2) **OVERVIEW OF COUNTS OF EACH TYPE** BEFORE (empty result because nothing exists) ![Screenshot 2024-12-03 at 5 09 03 PM](https://github.com/user-attachments/assets/c4758f8f-71ec-49b5-a94a-c3d464a9a51e) AFTER ![Screenshot 2024-12-03 at 5 08 44 PM](https://github.com/user-attachments/assets/cc0be586-c1bf-440b-a8a0-6bc236272509) ### Related issues or links None ### Checklist Provide proof that this works (this makes reviews move faster). Please perform one or more of the following: - [x] Update/add unit or integration tests. - [x] Include a screenshot showing what the graph looked like before and after your changes. - [ ] Include console log trace showing what happened before and after your changes. If you are changing a node or relationship: - [x] Update the [schema](https://github.com/lyft/cartography/tree/master/docs/root/modules) and [readme](https://github.com/lyft/cartography/blob/master/docs/schema/README.md). **N/A** If you are implementing a new intel module: - [ ] Use the NodeSchema [data model](https://cartography-cncf.github.io/cartography/dev/writing-intel-modules.html#defining-a-node). --------- Signed-off-by: Daniel Brauer --- cartography/intel/github/teams.py | 179 ++++++++++--- cartography/models/github/teams.py | 29 +++ cartography/util.py | 20 ++ docs/root/modules/github/schema.md | 13 + tests/data/github/teams.py | 18 ++ .../cartography/intel/github/test_teams.py | 29 ++- .../cartography/intel/github/test_users.py | 19 ++ .../cartography/intel/github/test_teams.py | 236 +++++++++++++++++- 8 files changed, 495 insertions(+), 48 deletions(-) diff --git a/cartography/intel/github/teams.py b/cartography/intel/github/teams.py index 0f5adc9886..b32ad3a804 100644 --- a/cartography/intel/github/teams.py +++ b/cartography/intel/github/teams.py @@ -1,6 +1,5 @@ import logging from collections import namedtuple -from time import sleep from typing import Any from typing import Dict from typing import List @@ -13,11 +12,27 @@ from cartography.intel.github.util import fetch_all from cartography.intel.github.util import PaginatedGraphqlData from cartography.models.github.teams import GitHubTeamSchema +from cartography.util import retries_with_backoff from cartography.util import timeit logger = logging.getLogger(__name__) +# A team's permission on a repo: https://docs.github.com/en/graphql/reference/enums#repositorypermission RepoPermission = namedtuple('RepoPermission', ['repo_url', 'permission']) +# A team member's role: https://docs.github.com/en/graphql/reference/enums#teammemberrole +UserRole = namedtuple('UserRole', ['user_url', 'role']) + + +def backoff_handler(details: Dict) -> None: + """ + Custom backoff handler for GitHub calls in this module. + """ + team_name = details['kwargs'].get('team_name') or 'not present in kwargs' + updated_details = {**details, 'team_name': team_name} + logger.warning( + "Backing off {wait:0.1f} seconds after {tries} tries. Calling function {target} for team {team_name}" + .format(**updated_details), + ) @timeit @@ -32,7 +47,10 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData, slug url description - repositories(first: 100) { + repositories { + totalCount + } + members(membership: IMMEDIATE) { totalCount } } @@ -64,36 +82,27 @@ def _get_team_repos_for_multiple_teams( result[team_name] = [] continue - repo_urls = [] - repo_permissions = [] - - max_tries = 5 + repo_urls: List[str] = [] + repo_permissions: List[str] = [] - for current_try in range(1, max_tries + 1): + def get_teams_repos_inner_func( + org: str, api_url: str, token: str, team_name: str, + repo_urls: List[str], repo_permissions: List[str], + ) -> None: + logger.info(f"Loading team repos for {team_name}.") team_repos = _get_team_repos(org, api_url, token, team_name) + # The `or []` is because `.nodes` can be None. See: + # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection + for repo in team_repos.nodes or []: + repo_urls.append(repo['url']) + # The `or []` is because `.edges` can be None. + for edge in team_repos.edges or []: + repo_permissions.append(edge['permission']) - try: - # The `or []` is because `.nodes` can be None. See: - # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection - for repo in team_repos.nodes or []: - repo_urls.append(repo['url']) - - # The `or []` is because `.edges` can be None. - for edge in team_repos.edges or []: - repo_permissions.append(edge['permission']) - # We're done! Break out of the retry loop. - break - - except TypeError: - # Handles issue #1334 - logger.warning( - f"GitHub returned None when trying to find repo or permission data for team {team_name}.", - exc_info=True, - ) - if current_try == max_tries: - raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.") - sleep(current_try ** 2) - + retries_with_backoff(get_teams_repos_inner_func, TypeError, 5, backoff_handler)( + org=org, api_url=api_url, token=token, team_name=team_name, + repo_urls=repo_urls, repo_permissions=repo_permissions, + ) # Shape = [(repo_url, 'WRITE'), ...]] result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)] return result @@ -142,10 +151,97 @@ def _get_team_repos(org: str, api_url: str, token: str, team: str) -> PaginatedG return team_repos +def _get_team_users_for_multiple_teams( + team_raw_data: list[dict[str, Any]], + org: str, + api_url: str, + token: str, +) -> dict[str, list[UserRole]]: + result: dict[str, list[UserRole]] = {} + for team in team_raw_data: + team_name = team['slug'] + user_count = team['members']['totalCount'] + + if user_count == 0: + # This team has no users so let's move on + result[team_name] = [] + continue + + user_urls: List[str] = [] + user_roles: List[str] = [] + + def get_teams_users_inner_func( + org: str, api_url: str, token: str, team_name: str, + user_urls: List[str], user_roles: List[str], + ) -> None: + logger.info(f"Loading team users for {team_name}.") + team_users = _get_team_users(org, api_url, token, team_name) + # The `or []` is because `.nodes` can be None. See: + # https://docs.github.com/en/graphql/reference/objects#teammemberconnection + for user in team_users.nodes or []: + user_urls.append(user['url']) + # The `or []` is because `.edges` can be None. + for edge in team_users.edges or []: + user_roles.append(edge['role']) + + retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)( + org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles, + ) + + # Shape = [(user_url, 'MAINTAINER'), ...]] + result[team_name] = [UserRole(url, role) for url, role in zip(user_urls, user_roles)] + return result + + +@timeit +def _get_team_users(org: str, api_url: str, token: str, team: str) -> PaginatedGraphqlData: + team_users_gql = """ + query($login: String!, $team: String!, $cursor: String) { + organization(login: $login) { + url + login + team(slug: $team) { + slug + members(first: 100, after: $cursor, membership: IMMEDIATE) { + totalCount + nodes { + url + } + edges { + role + } + pageInfo { + endCursor + hasNextPage + } + } + } + } + rateLimit { + limit + cost + remaining + resetAt + } + } + """ + team_users, _ = fetch_all( + token, + api_url, + org, + team_users_gql, + 'team', + resource_inner_type='members', + team=team, + ) + return team_users + + def transform_teams( team_paginated_data: PaginatedGraphqlData, org_data: Dict[str, Any], team_repo_data: dict[str, list[RepoPermission]], + team_user_data: dict[str, list[UserRole]], ) -> list[dict[str, Any]]: result = [] for team in team_paginated_data.nodes: @@ -155,19 +251,29 @@ def transform_teams( 'url': team['url'], 'description': team['description'], 'repo_count': team['repositories']['totalCount'], + 'member_count': team['members']['totalCount'], 'org_url': org_data['url'], 'org_login': org_data['login'], } repo_permissions = team_repo_data[team_name] - if not repo_permissions: + user_roles = team_user_data[team_name] + + if not repo_permissions and not user_roles: result.append(repo_info) continue - # `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN - for repo_url, permission in repo_permissions: - repo_info_copy = repo_info.copy() - repo_info_copy[permission] = repo_url - result.append(repo_info_copy) + if repo_permissions: + # `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN + for repo_url, permission in repo_permissions: + repo_info_copy = repo_info.copy() + repo_info_copy[permission] = repo_url + result.append(repo_info_copy) + if user_roles: + # `role` can be one of MAINTAINER, MEMBER + for user_url, role in user_roles: + repo_info_copy = repo_info.copy() + repo_info_copy[role] = user_url + result.append(repo_info_copy) return result @@ -203,7 +309,8 @@ def sync_github_teams( ) -> None: teams_paginated, org_data = get_teams(organization, github_url, github_api_key) team_repos = _get_team_repos_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key) - processed_data = transform_teams(teams_paginated, org_data, team_repos) + team_users = _get_team_users_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key) + processed_data = transform_teams(teams_paginated, org_data, team_repos, team_users) load_team_repos(neo4j_session, processed_data, common_job_parameters['UPDATE_TAG'], org_data['url']) common_job_parameters['org_url'] = org_data['url'] cleanup(neo4j_session, common_job_parameters) diff --git a/cartography/models/github/teams.py b/cartography/models/github/teams.py index 906b89d895..2f33768707 100644 --- a/cartography/models/github/teams.py +++ b/cartography/models/github/teams.py @@ -80,6 +80,33 @@ class GitHubTeamWriteRepoRel(CartographyRelSchema): properties: GitHubTeamToRepoRelProperties = GitHubTeamToRepoRelProperties() +@dataclass(frozen=True) +class GitHubTeamToUserRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class GitHubTeamMaintainerUserRel(CartographyRelSchema): + target_node_label: str = 'GitHubUser' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('MAINTAINER')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "MAINTAINER" + properties: GitHubTeamToUserRelProperties = GitHubTeamToUserRelProperties() + + +@dataclass(frozen=True) +class GitHubTeamMemberUserRel(CartographyRelSchema): + target_node_label: str = 'GitHubUser' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('MEMBER')}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "MEMBER" + properties: GitHubTeamToUserRelProperties = GitHubTeamToUserRelProperties() + + @dataclass(frozen=True) class GitHubTeamToOrganizationRelProperties(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @@ -107,6 +134,8 @@ class GitHubTeamSchema(CartographyNodeSchema): GitHubTeamReadRepoRel(), GitHubTeamTriageRepoRel(), GitHubTeamWriteRepoRel(), + GitHubTeamMaintainerUserRel(), + GitHubTeamMemberUserRel(), ], ) sub_resource_relationship: GitHubTeamToOrganizationRel = GitHubTeamToOrganizationRel() diff --git a/cartography/util.py b/cartography/util.py index 6c329272f3..fbb9358d02 100644 --- a/cartography/util.py +++ b/cartography/util.py @@ -15,6 +15,7 @@ from typing import List from typing import Optional from typing import Set +from typing import Type from typing import TypeVar from typing import Union @@ -288,6 +289,25 @@ def inner_function(*args, **kwargs): # type: ignore return cast(AWSGetFunc, inner_function) +def retries_with_backoff( + func: Callable, + exceptionType: Type[Exception], max_tries: int, on_backoff: Callable, +) -> Callable: + """ + Adds retry with backoff to the given function. (Could expand the possible input parameters as needed.) + """ + @wraps(func) + @backoff.on_exception( + backoff.expo, + exceptionType, + max_tries=max_tries, + on_backoff=on_backoff, + ) + def inner_function(*args, **kwargs): # type: ignore + return func(*args, **kwargs) + return cast(Callable, inner_function) + + def dict_value_to_str(obj: Dict, key: str) -> Optional[str]: """ Convert the value referenced by the key in the dict to a string, if it exists, and return it. If it doesn't exist, diff --git a/docs/root/modules/github/schema.md b/docs/root/modules/github/schema.md index 9c90fa1208..e0846b150d 100644 --- a/docs/root/modules/github/schema.md +++ b/docs/root/modules/github/schema.md @@ -129,6 +129,12 @@ A GitHubTeam [organization object](https://docs.github.com/en/graphql/reference/ (GitHubOrganization)-[RESOURCE]->(GitHubTeam) ``` +- GitHubUsers may be ['immediate'](https://docs.github.com/en/graphql/reference/enums#teammembershiptype) members of a team (as opposed to being members via membership in a child team), with their membership [role](https://docs.github.com/en/graphql/reference/enums#teammemberrole) being MEMBER or MAINTAINER. + + ``` + (GitHubUser)-[MEMBER|MAINTAINER]->(GitHubTeam) + ``` + ### GitHubUser Representation of a single GitHubUser [user object](https://developer.github.com/v4/object/user/). This node contains minimal data for the GitHub User. @@ -178,6 +184,13 @@ WRITE, MAINTAIN, TRIAGE, and READ ([Reference](https://docs.github.com/en/graphq (GitHubUser)-[MEMBER_OF|UNAFFILIATED]->(GitHubOrganization) ``` +- GitHubUsers may be ['immediate'](https://docs.github.com/en/graphql/reference/enums#teammembershiptype) members of a team (as opposed to being members via membership in a child team), with their membership [role](https://docs.github.com/en/graphql/reference/enums#teammemberrole) being MEMBER or MAINTAINER. + + ``` + (GitHubUser)-[MEMBER|MAINTAINER]->(GitHubTeam) + ``` + + ### GitHubBranch Representation of a single GitHubBranch [ref object](https://developer.github.com/v4/object/ref). This node contains minimal data for a repository branch. diff --git a/tests/data/github/teams.py b/tests/data/github/teams.py index 1631efc170..debf404070 100644 --- a/tests/data/github/teams.py +++ b/tests/data/github/teams.py @@ -8,30 +8,35 @@ 'url': 'https://github.com/orgs/example_org/teams/team-a', 'description': None, 'repositories': {'totalCount': 0}, + 'members': {'totalCount': 0}, }, { 'slug': 'team-b', 'url': 'https://github.com/orgs/example_org/teams/team-b', 'description': None, 'repositories': {'totalCount': 3}, + 'members': {'totalCount': 0}, }, { 'slug': 'team-c', 'url': 'https://github.com/orgs/example_org/teams/team-c', 'description': None, 'repositories': {'totalCount': 0}, + 'members': {'totalCount': 3}, }, { 'slug': 'team-d', 'url': 'https://github.com/orgs/example_org/teams/team-d', 'description': 'Team D', 'repositories': {'totalCount': 0}, + 'members': {'totalCount': 0}, }, { 'slug': 'team-e', 'url': 'https://github.com/orgs/example_org/teams/team-e', 'description': 'some description here', 'repositories': {'totalCount': 0}, + 'members': {'totalCount': 0}, }, ], edges=[], @@ -53,3 +58,16 @@ {'permission': 'READ'}, ], ) + +GH_TEAM_USERS = PaginatedGraphqlData( + nodes=[ + {'url': 'https://example.com/hjsimpson'}, + {'url': 'https://example.com/lmsimpson'}, + {'url': 'https://example.com/mbsimpson'}, + ], + edges=[ + {'role': 'MEMBER'}, + {'role': 'MAINTAINER'}, + {'role': 'MAINTAINER'}, + ], +) diff --git a/tests/integration/cartography/intel/github/test_teams.py b/tests/integration/cartography/intel/github/test_teams.py index f9c1ddd8ee..6583c1f50a 100644 --- a/tests/integration/cartography/intel/github/test_teams.py +++ b/tests/integration/cartography/intel/github/test_teams.py @@ -4,7 +4,9 @@ from cartography.intel.github.teams import sync_github_teams from tests.data.github.teams import GH_TEAM_DATA from tests.data.github.teams import GH_TEAM_REPOS -from tests.integration.cartography.intel.github.test_repos import _ensure_local_neo4j_has_test_data +from tests.data.github.teams import GH_TEAM_USERS +from tests.integration.cartography.intel.github import test_repos +from tests.integration.cartography.intel.github import test_users from tests.integration.util import check_nodes from tests.integration.util import check_rels @@ -14,11 +16,13 @@ FAKE_API_KEY = 'asdf' +@patch.object(cartography.intel.github.teams, '_get_team_users', return_value=GH_TEAM_USERS) @patch.object(cartography.intel.github.teams, '_get_team_repos', return_value=GH_TEAM_REPOS) @patch.object(cartography.intel.github.teams, 'get_teams', return_value=GH_TEAM_DATA) -def test_sync_github_teams(mock_teams, mock_team_repos, neo4j_session): +def test_sync_github_teams(mock_teams, mock_team_repos, mock_team_users, neo4j_session): # Arrange - _ensure_local_neo4j_has_test_data(neo4j_session) + test_repos._ensure_local_neo4j_has_test_data(neo4j_session) + test_users._ensure_local_neo4j_has_test_data(neo4j_session) # Arrange: Add another org to make sure we don't attach a node to the wrong org neo4j_session.run(''' MERGE (g:GitHubOrganization{id: "this should have no attachments"}) @@ -116,3 +120,22 @@ def test_sync_github_teams(mock_teams, mock_team_repos, neo4j_session): ) == { ('https://github.com/orgs/example_org/teams/team-b', 'https://github.com/lyft/cartography'), } + assert check_rels( + neo4j_session, + 'GitHubTeam', 'id', + 'GitHubUser', 'id', + 'MEMBER', + rel_direction_right=False, + ) == { + ('https://github.com/orgs/example_org/teams/team-c', 'https://example.com/hjsimpson'), + } + assert check_rels( + neo4j_session, + 'GitHubTeam', 'id', + 'GitHubUser', 'id', + 'MAINTAINER', + rel_direction_right=False, + ) == { + ('https://github.com/orgs/example_org/teams/team-c', 'https://example.com/lmsimpson'), + ('https://github.com/orgs/example_org/teams/team-c', 'https://example.com/mbsimpson'), + } diff --git a/tests/integration/cartography/intel/github/test_users.py b/tests/integration/cartography/intel/github/test_users.py index 21a0f5be91..33fe160145 100644 --- a/tests/integration/cartography/intel/github/test_users.py +++ b/tests/integration/cartography/intel/github/test_users.py @@ -1,6 +1,7 @@ from unittest.mock import patch import cartography.intel.github.users +from cartography.models.github.users import GitHubOrganizationUserSchema from tests.data.github.users import GITHUB_ENTERPRISE_OWNER_DATA from tests.data.github.users import GITHUB_ORG_DATA from tests.data.github.users import GITHUB_USER_DATA @@ -12,6 +13,24 @@ FAKE_API_KEY = 'asdf' +def _ensure_local_neo4j_has_test_data(neo4j_session): + """ + Not needed for this test file, but used to set up users for other tests that need them + """ + processed_affiliated_user_data, _ = ( + cartography.intel.github.users.transform_users( + GITHUB_USER_DATA[0], GITHUB_ENTERPRISE_OWNER_DATA[0], GITHUB_ORG_DATA, + ) + ) + cartography.intel.github.users.load_users( + neo4j_session, + GitHubOrganizationUserSchema(), + processed_affiliated_user_data, + GITHUB_ORG_DATA, + TEST_UPDATE_TAG, + ) + + @patch.object(cartography.intel.github.users, 'get_users', return_value=GITHUB_USER_DATA) @patch.object(cartography.intel.github.users, 'get_enterprise_owners', return_value=GITHUB_ENTERPRISE_OWNER_DATA) def test_sync(mock_owners, mock_users, neo4j_session): diff --git a/tests/unit/cartography/intel/github/test_teams.py b/tests/unit/cartography/intel/github/test_teams.py index e00e90e53d..defeaea15b 100644 --- a/tests/unit/cartography/intel/github/test_teams.py +++ b/tests/unit/cartography/intel/github/test_teams.py @@ -4,8 +4,10 @@ import pytest from cartography.intel.github.teams import _get_team_repos_for_multiple_teams +from cartography.intel.github.teams import _get_team_users_for_multiple_teams from cartography.intel.github.teams import RepoPermission from cartography.intel.github.teams import transform_teams +from cartography.intel.github.teams import UserRole from cartography.intel.github.util import PaginatedGraphqlData TEST_ORG_DATA = { @@ -26,6 +28,18 @@ def test_get_team_repos_empty_team_list(mock_get_team_repos): mock_get_team_repos.assert_not_called() +@patch('cartography.intel.github.teams._get_team_users') +def test_get_team_users_empty_team_list(mock_get_team_users): + # Assert that if we pass in empty data then we get back empty data + assert _get_team_users_for_multiple_teams( + [], + 'test-org', + 'https://api.github.com', + 'test-token', + ) == {} + mock_get_team_users.assert_not_called() + + @patch('cartography.intel.github.teams._get_team_repos') def test_get_team_repos_team_with_no_repos(mock_get_team_repos): # Arrange @@ -41,6 +55,21 @@ def test_get_team_repos_team_with_no_repos(mock_get_team_repos): mock_get_team_repos.assert_not_called() +@patch('cartography.intel.github.teams._get_team_users') +def test_get_team_users_team_with_no_users(mock_get_team_users): + # Arrange + team_data = [{'slug': 'team1', 'members': {'totalCount': 0}}] + + # Assert that we retrieve data where a team has no repos + assert _get_team_users_for_multiple_teams( + team_data, + 'test-org', + 'https://api.github.com', + 'test-token', + ) == {'team1': []} + mock_get_team_users.assert_not_called() + + @patch('cartography.intel.github.teams._get_team_repos') def test_get_team_repos_happy_path(mock_get_team_repos): # Arrange @@ -67,9 +96,35 @@ def test_get_team_repos_happy_path(mock_get_team_repos): mock_get_team_repos.assert_called_once_with('test-org', 'https://api.github.com', 'test-token', 'team1') +@patch('cartography.intel.github.teams._get_team_users') +def test_get_team_users_happy_path(mock_get_team_users): + # Arrange + team_data = [{'slug': 'team1', 'members': {'totalCount': 2}}] + mock_team_users = MagicMock() + mock_team_users.nodes = [{'url': 'https://github.com/user1'}, {'url': 'https://github.com/user2'}] + mock_team_users.edges = [{'role': 'MAINTAINER'}, {'role': 'MEMBER'}] + mock_get_team_users.return_value = mock_team_users + + # Act + assert that the returned data is correct + assert _get_team_users_for_multiple_teams( + team_data, + 'test-org', + 'https://api.github.com', + 'test-token', + ) == { + 'team1': [ + UserRole('https://github.com/user1', 'MAINTAINER'), + UserRole('https://github.com/user2', 'MEMBER'), + ], + } + + # Assert that we did not retry because this was the happy path + mock_get_team_users.assert_called_once_with('test-org', 'https://api.github.com', 'test-token', 'team1') + + @patch('cartography.intel.github.teams._get_team_repos') -@patch('cartography.intel.github.teams.sleep') -def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos): +@patch('cartography.intel.github.teams.backoff_handler', spec=True) +def test_get_team_repos_github_returns_none(mock_backoff_handler, mock_get_team_repos): # Arrange team_data = [{'slug': 'team1', 'repositories': {'totalCount': 1}}] mock_team_repos = MagicMock() @@ -79,7 +134,7 @@ def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos): mock_get_team_repos.return_value = mock_team_repos # Assert we raise an exception - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): _get_team_repos_for_multiple_teams( team_data, 'test-org', @@ -89,19 +144,45 @@ def test_get_team_repos_github_returns_none(mock_sleep, mock_get_team_repos): # Assert that we retry and give up assert mock_get_team_repos.call_count == 5 - assert mock_sleep.call_count == 4 + assert mock_backoff_handler.call_count == 4 + + +@patch('cartography.intel.github.teams._get_team_users') +@patch('cartography.intel.github.teams.backoff_handler', spec=True) +def test_get_team_users_github_returns_none(mock_backoff_handler, mock_get_team_users): + # Arrange + team_data = [{'slug': 'team1', 'members': {'totalCount': 1}}] + mock_team_users = MagicMock() + # Set up the condition where GitHub returns a None url and None edge as in #1334. + mock_team_users.nodes = [None] + mock_team_users.edges = [None] + mock_get_team_users.return_value = mock_team_users + + # Assert we raise an exception + with pytest.raises(TypeError): + _get_team_users_for_multiple_teams( + team_data, + 'test-org', + 'https://api.github.com', + 'test-token', + ) + + # Assert that we retry and give up + assert mock_get_team_users.call_count == 5 + assert mock_backoff_handler.call_count == 4 def test_transform_teams_empty_team_data(): # Arrange team_paginated_data = PaginatedGraphqlData(nodes=[], edges=[]) team_repo_data: dict[str, list[RepoPermission]] = {} + team_user_data: dict[str, list[UserRole]] = {} # Act + assert - assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data) == [] + assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data, team_user_data) == [] -def test_transform_teams_team_with_no_repos(): +def test_transform_teams_team_with_no_repos_no_users(): # Arrange team_paginated_data = PaginatedGraphqlData( nodes=[ @@ -110,19 +191,22 @@ def test_transform_teams_team_with_no_repos(): 'url': 'https://github.com/testorg/team1', 'description': 'Test Team 1', 'repositories': {'totalCount': 0}, + 'members': {'totalCount': 0}, }, ], edges=[], ) team_repo_data = {'team1': []} + team_user_data = {'team1': []} # Act + Assert - assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data) == [ + assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data, team_user_data) == [ { 'name': 'team1', 'url': 'https://github.com/testorg/team1', 'description': 'Test Team 1', 'repo_count': 0, + 'member_count': 0, 'org_url': 'https://github.com/testorg', 'org_login': 'testorg', }, @@ -138,6 +222,7 @@ def test_transform_teams_team_with_repos(): 'url': 'https://github.com/testorg/team1', 'description': 'Test Team 1', 'repositories': {'totalCount': 2}, + 'members': {'totalCount': 0}, }, ], edges=[], @@ -148,14 +233,16 @@ def test_transform_teams_team_with_repos(): RepoPermission('https://github.com/testorg/repo2', 'WRITE'), ], } + team_user_data = {'team1': []} # Act - assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data) == [ + assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data, team_user_data) == [ { 'name': 'team1', 'url': 'https://github.com/testorg/team1', 'description': 'Test Team 1', 'repo_count': 2, + 'member_count': 0, 'org_url': 'https://github.com/testorg', 'org_login': 'testorg', 'READ': 'https://github.com/testorg/repo1', @@ -165,6 +252,7 @@ def test_transform_teams_team_with_repos(): 'url': 'https://github.com/testorg/team1', 'description': 'Test Team 1', 'repo_count': 2, + 'member_count': 0, 'org_url': 'https://github.com/testorg', 'org_login': 'testorg', 'WRITE': 'https://github.com/testorg/repo2', @@ -172,6 +260,125 @@ def test_transform_teams_team_with_repos(): ] +def test_transform_teams_team_with_members(): + # Arrange + team_paginated_data = PaginatedGraphqlData( + nodes=[ + { + 'slug': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repositories': {'totalCount': 0}, + 'members': {'totalCount': 2}, + }, + ], + edges=[], + ) + team_repo_data = {'team1': []} + team_user_data = { + 'team1': [ + UserRole('https://github.com/user1', 'MEMBER'), + UserRole('https://github.com/user2', 'MAINTAINER'), + ], + } + + # Act + assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data, team_user_data) == [ + { + 'name': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repo_count': 0, + 'member_count': 2, + 'org_url': 'https://github.com/testorg', + 'org_login': 'testorg', + 'MEMBER': 'https://github.com/user1', + }, + { + 'name': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repo_count': 0, + 'member_count': 2, + 'org_url': 'https://github.com/testorg', + 'org_login': 'testorg', + 'MAINTAINER': 'https://github.com/user2', + }, + ] + + +def test_transform_teams_team_with_repos_and_members(): + # Arrange + team_paginated_data = PaginatedGraphqlData( + nodes=[ + { + 'slug': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repositories': {'totalCount': 2}, + 'members': {'totalCount': 2}, + }, + ], + edges=[], + ) + team_repo_data = { + 'team1': [ + RepoPermission('https://github.com/testorg/repo1', 'READ'), + RepoPermission('https://github.com/testorg/repo2', 'WRITE'), + ], + } + team_user_data = { + 'team1': [ + UserRole('https://github.com/user1', 'MEMBER'), + UserRole('https://github.com/user2', 'MAINTAINER'), + ], + } + + # Act + assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data, team_user_data) == [ + { + 'name': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repo_count': 2, + 'member_count': 2, + 'org_url': 'https://github.com/testorg', + 'org_login': 'testorg', + 'READ': 'https://github.com/testorg/repo1', + }, + { + 'name': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repo_count': 2, + 'member_count': 2, + 'org_url': 'https://github.com/testorg', + 'org_login': 'testorg', + 'WRITE': 'https://github.com/testorg/repo2', + }, + { + 'name': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repo_count': 2, + 'member_count': 2, + 'org_url': 'https://github.com/testorg', + 'org_login': 'testorg', + 'MEMBER': 'https://github.com/user1', + }, + { + 'name': 'team1', + 'url': 'https://github.com/testorg/team1', + 'description': 'Test Team 1', + 'repo_count': 2, + 'member_count': 2, + 'org_url': 'https://github.com/testorg', + 'org_login': 'testorg', + 'MAINTAINER': 'https://github.com/user2', + }, + ] + + def test_transform_teams_multiple_teams(): # Arrange team_paginated_data = PaginatedGraphqlData( @@ -181,12 +388,14 @@ def test_transform_teams_multiple_teams(): 'url': 'https://github.com/testorg/team1', 'description': 'Test Team 1', 'repositories': {'totalCount': 1}, + 'members': {'totalCount': 0}, }, { 'slug': 'team2', 'url': 'https://github.com/testorg/team2', 'description': 'Test Team 2', 'repositories': {'totalCount': 0}, + 'members': {'totalCount': 1}, }, ], edges=[], @@ -197,14 +406,21 @@ def test_transform_teams_multiple_teams(): ], 'team2': [], } + team_user_data = { + 'team1': [], + 'team2': [ + UserRole('https://github.com/user1', 'MEMBER'), + ], + } # Act + assert - assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data) == [ + assert transform_teams(team_paginated_data, TEST_ORG_DATA, team_repo_data, team_user_data) == [ { 'name': 'team1', 'url': 'https://github.com/testorg/team1', 'description': 'Test Team 1', 'repo_count': 1, + 'member_count': 0, 'org_url': 'https://github.com/testorg', 'org_login': 'testorg', 'ADMIN': 'https://github.com/testorg/repo1', @@ -214,7 +430,9 @@ def test_transform_teams_multiple_teams(): 'url': 'https://github.com/testorg/team2', 'description': 'Test Team 2', 'repo_count': 0, + 'member_count': 1, 'org_url': 'https://github.com/testorg', 'org_login': 'testorg', + 'MEMBER': 'https://github.com/user1', }, ]