diff --git a/cartography/data/jobs/cleanup/github_repos_cleanup.json b/cartography/data/jobs/cleanup/github_repos_cleanup.json index 0d5bc4111..87f5bf172 100644 --- a/cartography/data/jobs/cleanup/github_repos_cleanup.json +++ b/cartography/data/jobs/cleanup/github_repos_cleanup.json @@ -63,6 +63,31 @@ "query": "MATCH (:GitHubUser)-[r:OUTSIDE_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", "iterative": true, "iterationsize": 100 + }, + { + "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_ADMIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", + "iterative": true, + "iterationsize": 100 + }, + { + "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_MAINTAIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", + "iterative": true, + "iterationsize": 100 + }, + { + "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_READ]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", + "iterative": true, + "iterationsize": 100 + }, + { + "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_TRIAGE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", + "iterative": true, + "iterationsize": 100 + }, + { + "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", + "iterative": true, + "iterationsize": 100 }], "name": "cleanup GitHub repos data" } diff --git a/cartography/intel/github/repos.py b/cartography/intel/github/repos.py index 9ca01156f..1ea0c67f4 100644 --- a/cartography/intel/github/repos.py +++ b/cartography/intel/github/repos.py @@ -1,5 +1,6 @@ import configparser import logging +from collections import namedtuple from string import Template from typing import Any from typing import Dict @@ -12,11 +13,26 @@ from packaging.utils import canonicalize_name from cartography.intel.github.util import fetch_all +from cartography.intel.github.util import PaginatedGraphqlData from cartography.util import run_cleanup_job from cartography.util import timeit logger = logging.getLogger(__name__) + +# Representation of a user's permission level and affiliation to a GitHub repo. See: +# - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission +# - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation +UserAffiliationAndRepoPermission = namedtuple( + 'UserAffiliationAndRepoPermission', + [ + 'user', # Dict + 'permission', # 'WRITE', 'MAINTAIN', 'ADMIN', etc + 'affiliation', # 'OUTSIDE', 'DIRECT' + ], +) + + GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """ query($login: String!, $cursor: String) { organization(login: $login) @@ -59,17 +75,11 @@ login __typename } - collaborators(affiliation: OUTSIDE, first: 50) { - edges { - permission - } - nodes { - url - login - name - email - company - } + directCollaborators: collaborators(first: 100, affiliation: DIRECT) { + totalCount + } + outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) { + totalCount } requirements:object(expression: "HEAD:requirements.txt") { ... on Blob { @@ -89,6 +99,111 @@ # Note: In the above query, `HEAD` references the default branch. # See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository +GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """ + query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) { + organization(login: $login) { + url + login + repository(name: $repo){ + name + collaborators(first: 50, affiliation: $affiliation, after: $cursor) { + edges { + permission + } + nodes { + url + login + name + email + company + } + pageInfo{ + endCursor + hasNextPage + } + } + } + } + rateLimit { + limit + cost + remaining + resetAt + } + } + """ + + +def _get_repo_collaborators_for_multiple_repos( + repo_raw_data: list[dict[str, Any]], + affiliation: str, + org: str, + api_url: str, + token: str, +) -> dict[str, List[UserAffiliationAndRepoPermission]]: + """ + For every repo in the given list, retrieve the collaborators. + :param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape. + :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'. + See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation + :param org: The name of the target Github organization as string. + :param api_url: The Github v4 API endpoint as string. + :param token: The Github API token as string. + :return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission + """ + result: dict[str, List[UserAffiliationAndRepoPermission]] = {} + for repo in repo_raw_data: + repo_name = repo['name'] + repo_url = repo['url'] + + if ((affiliation == 'OUTSIDE' and repo['outsideCollaborators']['totalCount'] == 0) or + (affiliation == 'DIRECT' and repo['directCollaborators']['totalCount'] == 0)): + # repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call + result[repo_url] = [] + continue + + collab_users = [] + collab_permission = [] + collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation) + # nodes and edges are expected to always be present given that we only call for them if totalCount is > 0 + for collab in collaborators.nodes: + collab_users.append(collab) + for perm in collaborators.edges: + collab_permission.append(perm['permission']) + + result[repo_url] = [ + UserAffiliationAndRepoPermission(user, permission, affiliation) + for user, permission in zip(collab_users, collab_permission) + ] + return result + + +def _get_repo_collaborators( + token: str, api_url: str, organization: str, repo: str, affiliation: str, +) -> PaginatedGraphqlData: + """ + Retrieve a list of collaborators for a given repository, as described in + https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection. + :param token: The Github API token as string. + :param api_url: The Github v4 API endpoint as string. + :param organization: The name of the target Github organization as string. + :pram repo: The name of the target Github repository as string. + :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'. + See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation + :return: A list of dicts representing repos. See tests.data.github.repos for data shape. + """ + collaborators, _ = fetch_all( + token, + api_url, + organization, + GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL, + 'repository', + resource_inner_type='collaborators', + repo=repo, + affiliation=affiliation, + ) + return collaborators + @timeit def get(token: str, api_url: str, organization: str) -> List[Dict]: @@ -111,11 +226,18 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]: return repos.nodes -def transform(repos_json: List[Dict]) -> Dict: +def transform( + repos_json: List[Dict], direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]], + outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]], +) -> Dict: """ Parses the JSON returned from GitHub API to create data for graph ingestion - :param repos_json: the list of individual repository nodes from GitHub. See tests.data.github.repos.GET_REPOS for - data shape. + :param repos_json: the list of individual repository nodes from GitHub. + See tests.data.github.repos.GET_REPOS for data shape. + :param direct_collaborators: dict of repo URL to list of direct collaborators. + See tests.data.github.repos.DIRECT_COLLABORATORS for data shape. + :param outside_collaborators: dict of repo URL to list of outside collaborators. + See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape. :return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo mapping, and Python requirements files (if any) in a repo. """ @@ -123,7 +245,10 @@ def transform(repos_json: List[Dict]) -> Dict: transformed_repo_languages: List[Dict] = [] transformed_repo_owners: List[Dict] = [] # See https://docs.github.com/en/graphql/reference/enums#repositorypermission - transformed_collaborators: Dict[str, List[Any]] = { + transformed_outside_collaborators: Dict[str, List[Any]] = { + 'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [], + } + transformed_direct_collaborators: Dict[str, List[Any]] = { 'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [], } transformed_requirements_files: List[Dict] = [] @@ -131,14 +256,22 @@ def transform(repos_json: List[Dict]) -> Dict: _transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages) _transform_repo_objects(repo_object, transformed_repo_list) _transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners) - _transform_collaborators(repo_object['collaborators'], repo_object['url'], transformed_collaborators) + _transform_collaborators( + repo_object['url'], outside_collaborators[repo_object['url']], + transformed_outside_collaborators, + ) + _transform_collaborators( + repo_object['url'], direct_collaborators[repo_object['url']], + transformed_direct_collaborators, + ) _transform_requirements_txt(repo_object['requirements'], repo_object['url'], transformed_requirements_files) _transform_setup_cfg_requirements(repo_object['setupCfg'], repo_object['url'], transformed_requirements_files) results = { 'repos': transformed_repo_list, 'repo_languages': transformed_repo_languages, 'repo_owners': transformed_repo_owners, - 'repo_collaborators': transformed_collaborators, + 'repo_outside_collaborators': transformed_outside_collaborators, + 'repo_direct_collaborators': transformed_direct_collaborators, 'python_requirements': transformed_requirements_files, } return results @@ -229,11 +362,15 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di }) -def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_collaborators: Dict) -> None: +def _transform_collaborators( + repo_url: str, collaborators: List[UserAffiliationAndRepoPermission], transformed_collaborators: Dict, +) -> None: """ - Performs data adjustments for outside collaborators in a GitHub repo. + Performs data adjustments for collaborators in a GitHub repo. Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...] - :param collaborators: See cartography.tests.data.github.repos for data shape. + :param collaborators: For data shape, see + cartography.tests.data.github.repos.DIRECT_COLLABORATORS + cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS :param repo_url: The URL of the GitHub repo. :param transformed_collaborators: Output dict. Data shape = {'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]} @@ -241,10 +378,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col """ # `collaborators` is sometimes None if collaborators: - for idx, user in enumerate(collaborators['nodes']): - user_permission = collaborators['edges'][idx]['permission'] + for collaborator in collaborators: + user = collaborator.user user['repo_url'] = repo_url - transformed_collaborators[user_permission].append(user) + user['affiliation'] = collaborator.affiliation + transformed_collaborators[collaborator.permission].append(user) def _transform_requirements_txt( @@ -482,7 +620,7 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner @timeit -def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict) -> None: +def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict, affiliation: str) -> None: query = Template(""" UNWIND $UserData as user @@ -502,7 +640,7 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat SET o.lastupdated = $UpdateTag """) for collab_type in collaborators.keys(): - relationship_label = f"OUTSIDE_COLLAB_{collab_type}" + relationship_label = f"{affiliation}_COLLAB_{collab_type}" neo4j_session.run( query.safe_substitute(rel_label=relationship_label), UserData=collaborators[collab_type], @@ -515,7 +653,12 @@ def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: D load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos']) load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners']) load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages']) - load_collaborators(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_collaborators']) + load_collaborators( + neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_direct_collaborators'], 'DIRECT', + ) + load_collaborators( + neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_outside_collaborators'], 'OUTSIDE', + ) load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements']) @@ -561,6 +704,12 @@ def sync( """ logger.info("Syncing GitHub repos") repos_json = get(github_api_key, github_url, organization) - repo_data = transform(repos_json) + direct_collabs = _get_repo_collaborators_for_multiple_repos( + repos_json, "DIRECT", organization, github_url, github_api_key, + ) + outside_collabs = _get_repo_collaborators_for_multiple_repos( + repos_json, "OUTSIDE", organization, github_url, github_api_key, + ) + repo_data = transform(repos_json, direct_collabs, outside_collabs) load(neo4j_session, common_job_parameters, repo_data) run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters) diff --git a/docs/root/modules/github/schema.md b/docs/root/modules/github/schema.md index 5a977d3dd..9c90fa120 100644 --- a/docs/root/modules/github/schema.md +++ b/docs/root/modules/github/schema.md @@ -39,13 +39,20 @@ Representation of a single GitHubRepository (repo) [repository object](https://d (GitHubOrganization)-[OWNER]->(GitHubRepository) ``` -- GitHubRepositories in an organization can have outside collaborators with different permissions, including ADMIN, +- GitHubRepositories in an organization can have [outside collaborators](https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation) who may be granted different levels of access, including ADMIN, WRITE, MAINTAIN, TRIAGE, and READ ([Reference](https://docs.github.com/en/graphql/reference/enums#repositorypermission)). ``` (GitHubUser)-[:OUTSIDE_COLLAB_{ACTION}]->(GitHubRepository) ``` +- GitHubRepositories in an organization also mark all [direct collaborators](https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation), folks who are not necessarily 'outside' but who are granted access directly to the repository (as opposed to via membership in a team). They may be granted different levels of access, including ADMIN, +WRITE, MAINTAIN, TRIAGE, and READ ([Reference](https://docs.github.com/en/graphql/reference/enums#repositorypermission)). + + ``` + (GitHubUser)-[:DIRECT_COLLAB_{ACTION}]->(GitHubRepository) + ``` + - GitHubRepositories use ProgrammingLanguages ``` (GitHubRepository)-[:LANGUAGE]->(ProgrammingLanguage) @@ -151,13 +158,20 @@ Representation of a single GitHubUser [user object](https://developer.github.com (GitHubUser)-[OWNER]->(GitHubRepository) ``` -- GitHubRepositories in an organization can have outside collaborators with different permissions, including ADMIN, +- GitHubRepositories in an organization can have [outside collaborators](https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation) who may be granted different levels of access, including ADMIN, WRITE, MAINTAIN, TRIAGE, and READ ([Reference](https://docs.github.com/en/graphql/reference/enums#repositorypermission)). ``` (GitHubUser)-[:OUTSIDE_COLLAB_{ACTION}]->(GitHubRepository) ``` +- GitHubRepositories in an organization also mark all [direct collaborators](https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation), folks who are not necessarily 'outside' but who are granted access directly to the repository (as opposed to via membership in a team). They may be granted different levels of access, including ADMIN, +WRITE, MAINTAIN, TRIAGE, and READ ([Reference](https://docs.github.com/en/graphql/reference/enums#repositorypermission)). + + ``` + (GitHubUser)-[:DIRECT_COLLAB_{ACTION}]->(GitHubRepository) + ``` + - GitHubUsers are members of an organization. In some cases there may be a user who is "unaffiliated" with an org, for example if the user is an enterprise owner, but not member of, the org. [Enterprise owners](https://docs.github.com/en/enterprise-cloud@latest/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/roles-in-an-enterprise#enterprise-owners) have complete control over the enterprise (i.e. they can manage all enterprise settings, members, and policies) yet may not show up on member lists of the GitHub org. ``` diff --git a/tests/data/github/repos.py b/tests/data/github/repos.py index a362538c5..a04fbd597 100644 --- a/tests/data/github/repos.py +++ b/tests/data/github/repos.py @@ -1,6 +1,10 @@ import textwrap +from typing import Any +from typing import List -GET_REPOS = [ +from cartography.intel.github.repos import UserAffiliationAndRepoPermission + +GET_REPOS: List[dict[str, Any]] = [ { 'name': 'sample_repo', 'nameWithOwner': 'example_org/sample_repo', @@ -32,7 +36,8 @@ 'login': 'example_org', '__typename': 'Organization', }, - 'collaborators': {'edges': [], 'nodes': []}, + 'directCollaborators': {'totalCount': 0}, + 'outsideCollaborators': {'totalCount': 0}, 'requirements': {'text': 'cartography\nhttplib2<0.7.0\njinja2\nlxml\n-e git+https://example.com#egg=foobar\nhttps://example.com/foobar.tar.gz\npip @ https://github.com/pypa/pip/archive/1.3.1.zip#sha1=da9234ee9982d4bbb3c72346a6de940a148ea686\n'}, # noqa 'setupCfg': { 'text': textwrap.dedent(''' @@ -42,7 +47,8 @@ scipy!=1.20.0 # comment '''), }, - }, { + }, + { 'name': 'SampleRepo2', 'nameWithOwner': 'example_org/SampleRepo2', 'primaryLanguage': { @@ -72,7 +78,8 @@ 'url': 'https://github.com/example_org', 'login': 'example_org', '__typename': 'Organization', }, - 'collaborators': None, + 'directCollaborators': {'totalCount': 1}, + 'outsideCollaborators': {'totalCount': 0}, 'requirements': None, 'setupCfg': None, }, @@ -103,52 +110,8 @@ 'login': 'example_org', '__typename': 'Organization', }, - 'collaborators': { - 'edges': [ - {'permission': 'WRITE'}, - {'permission': 'WRITE'}, - {'permission': 'WRITE'}, - {'permission': 'WRITE'}, - {'permission': 'WRITE'}, - ], - 'nodes': [ - { - 'url': 'https://github.com/marco-lancini', - 'login': 'marco-lancini', - 'name': 'Marco Lancini', - 'email': 'm@example.com', - 'company': 'ExampleCo', - }, - { - 'url': 'https://github.com/sachafaust', - 'login': 'sachafaust', - 'name': 'Sacha Faust', - 'email': 's@example.com', - 'company': 'ExampleCo', - }, - { - 'url': 'https://github.com/SecPrez', - 'login': 'SecPrez', - 'name': 'SecPrez', - 'email': 'sec@example.com', - 'company': 'ExampleCo', - }, - { - 'url': 'https://github.com/ramonpetgrave64', - 'login': 'ramonpetgrave64', - 'name': 'Ramon Petgrave', - 'email': 'r@example.com', - 'company': 'ExampleCo', - }, - { - 'url': 'https://github.com/roshinis78', - 'login': 'roshinis78', - 'name': 'Roshini Saravanakumar', - 'email': 'ro@example.com', - 'company': 'ExampleCo', - }, - ], - }, + 'directCollaborators': {'totalCount': 3}, + 'outsideCollaborators': {'totalCount': 5}, 'requirements': { 'text': 'cartography==0.1.0\nhttplib2>=0.7.0\njinja2\nlxml\n# This is a comment line to be ignored\nokta==0.9.0', # noqa }, @@ -163,3 +126,128 @@ }, }, ] + + +# - This list is not a raw API response, but the lightly processed collected results of all the API calls, for all +# repos that have collaborators. +# - The actual values are mostly arbitrary but the length of the lists is directly tied to the data in GET_REPOS, +# e.g. since GET_REPOS notes that 'sample_repo' has 0 direct collaborators, the 'sample_repo' list below is empty. +OUTSIDE_COLLABORATORS: dict[str, List[UserAffiliationAndRepoPermission]] = { + GET_REPOS[0]['url']: [], + GET_REPOS[1]['url']: [], + GET_REPOS[2]['url']: [ + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/marco-lancini', + 'login': 'marco-lancini', + 'name': 'Marco Lancini', + 'email': 'm@example.com', + 'company': 'ExampleCo', + }, + permission='WRITE', + affiliation='OUTSIDE', + ), + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/sachafaust', + 'login': 'sachafaust', + 'name': 'Sacha Faust', + 'email': 's@example.com', + 'company': 'ExampleCo', + }, + permission='READ', + affiliation='OUTSIDE', + ), + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/SecPrez', + 'login': 'SecPrez', + 'name': 'SecPrez', + 'email': 'sec@example.com', + 'company': 'ExampleCo', + }, + permission='ADMIN', + affiliation='OUTSIDE', + ), + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/ramonpetgrave64', + 'login': 'ramonpetgrave64', + 'name': 'Ramon Petgrave', + 'email': 'r@example.com', + 'company': 'ExampleCo', + }, + permission='TRIAGE', + affiliation='OUTSIDE', + ), + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/roshinis78', + 'login': 'roshinis78', + 'name': 'Roshini Saravanakumar', + 'email': 'ro@example.com', + 'company': 'ExampleCo', + }, + permission='MAINTAIN', + affiliation='OUTSIDE', + ), + ], +} + + +# - All notes for OUTSIDE_COLLABORATORS apply here as well. +# - We also include the lists from OUTSIDE_COLLABORATORS here. Users who are outside collaborators are +# also marked as direct collaborators, by Github, so we mimic that idea in our test data here. +DIRECT_COLLABORATORS: dict[str, List[UserAffiliationAndRepoPermission]] = { + GET_REPOS[0]['url']: [], + GET_REPOS[1]['url']: [ + *OUTSIDE_COLLABORATORS[GET_REPOS[1]['url']], + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/direct_foo', + 'login': 'direct_foo', + 'name': 'Foo User', + 'email': '', + 'company': None, + }, + permission='ADMIN', + affiliation='DIRECT', + ), + ], + GET_REPOS[2]['url']: [ + *OUTSIDE_COLLABORATORS[GET_REPOS[2]['url']], + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/direct_bar', + 'login': 'direct_bar', + 'name': 'Bar User', + 'email': 'b@sushigrass.com', + 'company': 'sushiGrass', + }, + permission='WRITE', + affiliation='DIRECT', + ), + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/direct_baz', + 'login': 'direct_baz', + 'name': 'Baz User', + 'email': 'b@testco.com', + 'company': 'TestCo', + }, + permission='READ', + affiliation='DIRECT', + ), + UserAffiliationAndRepoPermission( + user={ + 'url': 'https://github.com/direct_bat', + 'login': 'direct_bat', + 'name': 'Bat User', + 'email': '', + 'company': None, + }, + permission='MAINTAIN', + affiliation='DIRECT', + ), + ], +} diff --git a/tests/integration/cartography/intel/github/test_repos.py b/tests/integration/cartography/intel/github/test_repos.py index e15966d14..7d066a166 100644 --- a/tests/integration/cartography/intel/github/test_repos.py +++ b/tests/integration/cartography/intel/github/test_repos.py @@ -1,5 +1,7 @@ import cartography.intel.github -import tests.data.github.repos +from tests.data.github.repos import DIRECT_COLLABORATORS +from tests.data.github.repos import GET_REPOS +from tests.data.github.repos import OUTSIDE_COLLABORATORS TEST_UPDATE_TAG = 123456789 @@ -8,7 +10,7 @@ def _ensure_local_neo4j_has_test_data(neo4j_session): - repo_data = cartography.intel.github.repos.transform(tests.data.github.repos.GET_REPOS) + repo_data = cartography.intel.github.repos.transform(GET_REPOS, DIRECT_COLLABORATORS, OUTSIDE_COLLABORATORS) cartography.intel.github.repos.load( neo4j_session, TEST_JOB_PARAMS, @@ -20,8 +22,7 @@ def test_transform_and_load_repositories(neo4j_session): """ Test that we can correctly transform and load GitHubRepository nodes to Neo4j. """ - repositories_res = tests.data.github.repos.GET_REPOS - repos_data = cartography.intel.github.repos.transform(repositories_res) + repos_data = cartography.intel.github.repos.transform(GET_REPOS, DIRECT_COLLABORATORS, OUTSIDE_COLLABORATORS) cartography.intel.github.repos.load_github_repos( neo4j_session, TEST_UPDATE_TAG, @@ -43,8 +44,7 @@ def test_transform_and_load_repository_owners(neo4j_session): """ Ensure we can transform and load GitHub repository owner nodes. """ - repositories_res = tests.data.github.repos.GET_REPOS - repos_data = cartography.intel.github.repos.transform(repositories_res) + repos_data = cartography.intel.github.repos.transform(GET_REPOS, DIRECT_COLLABORATORS, OUTSIDE_COLLABORATORS) cartography.intel.github.repos.load_github_owners( neo4j_session, TEST_UPDATE_TAG, @@ -64,8 +64,7 @@ def test_transform_and_load_repository_languages(neo4j_session): """ Ensure we can transform and load GitHub repository languages nodes. """ - repositories_res = tests.data.github.repos.GET_REPOS - repos_data = cartography.intel.github.repos.transform(repositories_res) + repos_data = cartography.intel.github.repos.transform(GET_REPOS, DIRECT_COLLABORATORS, OUTSIDE_COLLABORATORS) cartography.intel.github.repos.load_github_languages( neo4j_session, TEST_UPDATE_TAG, @@ -179,12 +178,111 @@ def test_repository_to_languages(neo4j_session): def test_repository_to_collaborators(neo4j_session): _ensure_local_neo4j_has_test_data(neo4j_session) + + # Ensure outside collaborators are connected to the expected repos nodes = neo4j_session.run(""" - MATCH (repo:GitHubRepository{name:"cartography"})<-[:OUTSIDE_COLLAB_WRITE]-(user:GitHubUser) - RETURN count(user.username) as collab_count + MATCH (repo:GitHubRepository)<-[rel]-(user:GitHubUser) + WHERE type(rel) STARTS WITH 'OUTSIDE_COLLAB_' + RETURN repo.name, type(rel), user.username """) - actual_nodes = {n['collab_count'] for n in nodes} - expected_nodes = {5} + actual_nodes = { + ( + n['repo.name'], + n['type(rel)'], + n['user.username'], + ) for n in nodes + } + expected_nodes = { + ( + 'cartography', + 'OUTSIDE_COLLAB_WRITE', + 'marco-lancini', + ), + ( + 'cartography', + 'OUTSIDE_COLLAB_READ', + 'sachafaust', + ), + ( + 'cartography', + 'OUTSIDE_COLLAB_ADMIN', + 'SecPrez', + ), + ( + 'cartography', + 'OUTSIDE_COLLAB_TRIAGE', + 'ramonpetgrave64', + ), + ( + 'cartography', + 'OUTSIDE_COLLAB_MAINTAIN', + 'roshinis78', + ), + } + assert actual_nodes == expected_nodes + + # Ensure direct collaborators are connected to the expected repos + # Note how all the folks in the outside collaborators list are also in the direct collaborators list. They + # have both types of relationship. + nodes = neo4j_session.run(""" + MATCH (repo:GitHubRepository)<-[rel]-(user:GitHubUser) + WHERE type(rel) STARTS WITH 'DIRECT_COLLAB_' + RETURN repo.name, type(rel), user.username + """) + actual_nodes = { + ( + n['repo.name'], + n['type(rel)'], + n['user.username'], + ) for n in nodes + } + expected_nodes = { + ( + 'SampleRepo2', + 'DIRECT_COLLAB_ADMIN', + 'direct_foo', + ), + ( + 'cartography', + 'DIRECT_COLLAB_WRITE', + 'marco-lancini', + ), + ( + 'cartography', + 'DIRECT_COLLAB_READ', + 'sachafaust', + ), + ( + 'cartography', + 'DIRECT_COLLAB_ADMIN', + 'SecPrez', + ), + ( + 'cartography', + 'DIRECT_COLLAB_TRIAGE', + 'ramonpetgrave64', + ), + ( + 'cartography', + 'DIRECT_COLLAB_MAINTAIN', + 'roshinis78', + ), + ( + 'cartography', + 'DIRECT_COLLAB_WRITE', + 'direct_bar', + ), + ( + 'cartography', + 'DIRECT_COLLAB_READ', + 'direct_baz', + ), + ( + 'cartography', + 'DIRECT_COLLAB_MAINTAIN', + 'direct_bat', + ), + } assert actual_nodes == expected_nodes