diff --git a/cartography/data/jobs/cleanup/github_users_cleanup.json b/cartography/data/jobs/cleanup/github_org_and_users_cleanup.json similarity index 79% rename from cartography/data/jobs/cleanup/github_users_cleanup.json rename to cartography/data/jobs/cleanup/github_org_and_users_cleanup.json index 4419d8d650..7c7f6ff403 100644 --- a/cartography/data/jobs/cleanup/github_users_cleanup.json +++ b/cartography/data/jobs/cleanup/github_org_and_users_cleanup.json @@ -18,6 +18,11 @@ "query": "MATCH (:GitHubUser)-[r:MEMBER_OF]->(:GitHubOrganization) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", "iterative": true, "iterationsize": 100 + }, + { + "query": "MATCH (:GitHubUser)-[r:UNAFFILIATED]->(:GitHubOrganization) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", + "iterative": true, + "iterationsize": 100 }], "name": "cleanup GitHub users data" } diff --git a/cartography/intel/github/users.py b/cartography/intel/github/users.py index 2107b26800..bf26bebe4a 100644 --- a/cartography/intel/github/users.py +++ b/cartography/intel/github/users.py @@ -1,4 +1,5 @@ import logging +from copy import deepcopy from typing import Any from typing import Dict from typing import List @@ -6,7 +7,11 @@ import neo4j +from cartography.client.core.tx import load from cartography.intel.github.util import fetch_all +from cartography.models.github.orgs import GitHubOrganizationSchema +from cartography.models.github.users import GitHubOrganizationUserSchema +from cartography.models.github.users import GitHubUnaffiliatedUserSchema from cartography.stats import get_stats_client from cartography.util import merge_module_sync_metadata from cartography.util import run_cleanup_job @@ -44,17 +49,46 @@ } """ +GITHUB_ENTERPRISE_OWNER_USERS_PAGINATED_GRAPHQL = """ + query($login: String!, $cursor: String) { + organization(login: $login) + { + url + login + enterpriseOwners(first:100, after: $cursor){ + edges { + node { + url + login + name + isSiteAdmin + email + company + } + organizationRole + } + pageInfo{ + endCursor + hasNextPage + } + } + } + } + """ + @timeit -def get(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]: +def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]: """ Retrieve a list of users from the given GitHub organization as described in https://docs.github.com/en/graphql/reference/objects#organizationmemberedge. :param token: The Github API token as string. :param api_url: The Github v4 API endpoint as string. :param organization: The name of the target Github organization as string. - :return: A 2-tuple containing 1. a list of dicts representing users - see tests.data.github.users.GITHUB_USER_DATA - for shape, and 2. data on the owning GitHub organization - see tests.data.github.users.GITHUB_ORG_DATA for shape. + :return: A 2-tuple containing + 1. a list of dicts representing users and + 2. data on the owning GitHub organization + see tests.data.github.users.GITHUB_USER_DATA for shape of both """ users, org = fetch_all( token, @@ -66,56 +100,139 @@ def get(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]: return users.edges, org +def get_enterprise_owners(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]: + """ + Retrieve a list of enterprise owners from the given GitHub organization as described in + https://docs.github.com/en/graphql/reference/objects#organizationenterpriseowneredge. + :param token: The Github API token as string. + :param api_url: The Github v4 API endpoint as string. + :param organization: The name of the target Github organization as string. + :return: A 2-tuple containing + 1. a list of dicts representing users who are enterprise owners + 3. data on the owning GitHub organization + see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape + """ + owners, org = fetch_all( + token, + api_url, + organization, + GITHUB_ENTERPRISE_OWNER_USERS_PAGINATED_GRAPHQL, + 'enterpriseOwners', + ) + return owners.edges, org + + @timeit -def load_organization_users( - neo4j_session: neo4j.Session, user_data: List[Dict], org_data: Dict, +def transform_users(user_data: List[Dict], owners_data: List[Dict], org_data: Dict) -> Tuple[List[Dict], List[Dict]]: + """ + Taking raw user and owner data, return two lists of processed user data: + * organization users aka affiliated users (users directly affiliated with an organization) + * unaffiliated users (user who, for example, are enterprise owners but not members of the target organization). + + :param token: The Github API token as string. + :param api_url: The Github v4 API endpoint as string. + :param organization: The name of the target Github organization as string. + :return: A 2-tuple containing + 1. a list of dicts representing users who are affiliated with the target org + see tests.data.github.users.GITHUB_USER_DATA for shape + 2. a list of dicts representing users who are not affiliated (e.g. enterprise owners who are not also in + the target org) — see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape + 3. data on the owning GitHub organization + """ + + users_dict = {} + for user in user_data: + processed_user = deepcopy(user['node']) + processed_user['role'] = user['role'] + processed_user['hasTwoFactorEnabled'] = user['hasTwoFactorEnabled'] + processed_user['MEMBER_OF'] = org_data['url'] + users_dict[processed_user['url']] = processed_user + + owners_dict = {} + for owner in owners_data: + processed_owner = deepcopy(owner['node']) + processed_owner['isEnterpriseOwner'] = True + if owner['organizationRole'] == 'UNAFFILIATED': + processed_owner['UNAFFILIATED'] = org_data['url'] + else: + processed_owner['MEMBER_OF'] = org_data['url'] + owners_dict[processed_owner['url']] = processed_owner + + affiliated_users = [] # users affiliated with the target org + for url, user in users_dict.items(): + user['isEnterpriseOwner'] = url in owners_dict + affiliated_users.append(user) + + unaffiliated_users = [] # users not affiliated with the target org + for url, owner in owners_dict.items(): + if url not in users_dict: + unaffiliated_users.append(owner) + + return affiliated_users, unaffiliated_users + + +@timeit +def load_users( + neo4j_session: neo4j.Session, + node_schema: GitHubOrganizationUserSchema | GitHubUnaffiliatedUserSchema, + user_data: List[Dict], + org_data: Dict, update_tag: int, ) -> None: - query = """ - MERGE (org:GitHubOrganization{id: $OrgUrl}) - ON CREATE SET org.firstseen = timestamp() - SET org.username = $OrgLogin, - org.lastupdated = $UpdateTag - WITH org - - UNWIND $UserData as user - - MERGE (u:GitHubUser{id: user.node.url}) - ON CREATE SET u.firstseen = timestamp() - SET u.fullname = user.node.name, - u.username = user.node.login, - u.has_2fa_enabled = user.hasTwoFactorEnabled, - u.role = user.role, - u.is_site_admin = user.node.isSiteAdmin, - u.email = user.node.email, - u.company = user.node.company, - u.lastupdated = $UpdateTag - - MERGE (u)-[r:MEMBER_OF]->(org) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $UpdateTag - """ - neo4j_session.run( - query, - OrgUrl=org_data['url'], - OrgLogin=org_data['login'], - UserData=user_data, - UpdateTag=update_tag, + logger.info(f"Loading {len(user_data)} GitHub users to the graph") + load( + neo4j_session, + node_schema, + user_data, + lastupdated=update_tag, + org_url=org_data['url'], + ) + + +@timeit +def load_organization( + neo4j_session: neo4j.Session, + node_schema: GitHubOrganizationSchema, + org_data: List[Dict[str, Any]], + update_tag: int, +) -> None: + logger.info(f"Loading {len(org_data)} GitHub organization to the graph") + load( + neo4j_session, + node_schema, + org_data, + lastupdated=update_tag, ) @timeit def sync( neo4j_session: neo4j.Session, - common_job_parameters: Dict[str, Any], + common_job_parameters: Dict, github_api_key: str, github_url: str, organization: str, ) -> None: logger.info("Syncing GitHub users") - user_data, org_data = get(github_api_key, github_url, organization) - load_organization_users(neo4j_session, user_data, org_data, common_job_parameters['UPDATE_TAG']) - run_cleanup_job('github_users_cleanup.json', neo4j_session, common_job_parameters) + user_data, org_data = get_users(github_api_key, github_url, organization) + owners_data, org_data = get_enterprise_owners(github_api_key, github_url, organization) + processed_affiliated_user_data, processed_unaffiliated_user_data = ( + transform_users(user_data, owners_data, org_data) + ) + load_organization( + neo4j_session, GitHubOrganizationSchema(), [org_data], + common_job_parameters['UPDATE_TAG'], + ) + load_users( + neo4j_session, GitHubOrganizationUserSchema(), processed_affiliated_user_data, org_data, + common_job_parameters['UPDATE_TAG'], + ) + load_users( + neo4j_session, GitHubUnaffiliatedUserSchema(), processed_unaffiliated_user_data, org_data, + common_job_parameters['UPDATE_TAG'], + ) + # no automated cleanup job for users because user node has no sub_resource_relationship + run_cleanup_job('github_org_and_users_cleanup.json', neo4j_session, common_job_parameters) merge_module_sync_metadata( neo4j_session, group_type='GitHubOrganization', diff --git a/cartography/models/github/orgs.py b/cartography/models/github/orgs.py new file mode 100644 index 0000000000..41ae4f5631 --- /dev/null +++ b/cartography/models/github/orgs.py @@ -0,0 +1,26 @@ +""" +This schema does not handle the org's relationships. Those are handled by other schemas, for example: +* GitHubTeamSchema defines (GitHubOrganization)-[RESOURCE]->(GitHubTeam) +* GitHubUserSchema defines (GitHubUser)-[MEMBER_OF|UNAFFILIATED]->(GitHubOrganization) +(There may be others, these are just two examples.) +""" +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema + + +@dataclass(frozen=True) +class GitHubOrganizationNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('url') + username: PropertyRef = PropertyRef('login', extra_index=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class GitHubOrganizationSchema(CartographyNodeSchema): + label: str = 'GitHubOrganization' + properties: GitHubOrganizationNodeProperties = GitHubOrganizationNodeProperties() + other_relationships = None + sub_resource_relationship = None diff --git a/cartography/models/github/users.py b/cartography/models/github/users.py new file mode 100644 index 0000000000..387652dd90 --- /dev/null +++ b/cartography/models/github/users.py @@ -0,0 +1,119 @@ +""" +RE: Tenant relationship between GitHubUser and GitHubOrganization + +Note this relationship is implemented via 'other_relationships' and not via the 'sub_resource_relationship' +as might be expected. + +The 'sub_resource_relationship' typically describes the relationship of a node to its tenant (the org, project, or +other resource to which other nodes belong). An assumption of that relationship is that if the tenant goes +away, all nodes related to it should be cleaned up. + +In GitHub, though the GitHubUser's tenant seems to be GitHubOrganization, users actually exist independently. There +is a concept of 'UNAFFILIATED' users (https://docs.github.com/en/graphql/reference/enums#roleinorganization) like +Enterprise Owners who are related to an org even if they are not direct members of it. You would not want them to be +cleaned up, if an org goes away, and you could want them in your graph even if they are not members of any org in +the enterprise. + +To allow for this in the schema, this relationship is treated as any other node-to-node relationship, via +'other_relationships', instead of as the typical 'sub_resource_relationship'. + +RE: GitHubOrganizationUserSchema vs GitHubUnaffiliatedUserSchema + +As noted above, there are implicitly two types of users, those that are part of, or affiliated, to a target +GitHubOrganization, and those thare are not part, or unaffiliated. Both are represented as GitHubUser nodes, +but there are two schemas below to allow for some differences between them, e.g., unaffiliated lack these properties: + * the 'role' property, because unaffiliated have no 'role' in the target org + * the 'has_2fa_enabled' property, because the GitHub api does not return it, for these users +The main importance of having two schemas is to allow the two sets of users to be loaded separately. If we are loading +an unaffiliated user, but the user already exists in the graph (perhaps they are members of another GitHub orgs for +example), then loading the unaffiliated user will not blank out the 'role' and 'has_2fa_enabled' properties. +""" +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import OtherRelationships +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class BaseGitHubUserNodeProperties(CartographyNodeProperties): + # core properties in all GitHubUser nodes + id: PropertyRef = PropertyRef('url') + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + fullname: PropertyRef = PropertyRef('name') + username: PropertyRef = PropertyRef('login', extra_index=True) + is_site_admin: PropertyRef = PropertyRef('isSiteAdmin') + is_enterprise_owner: PropertyRef = PropertyRef('isEnterpriseOwner') + email: PropertyRef = PropertyRef('email') + company: PropertyRef = PropertyRef('company') + + +@dataclass(frozen=True) +class GitHubOrganizationUserNodeProperties(BaseGitHubUserNodeProperties): + # specified for affiliated users only. The GitHub api does not return this property for unaffiliated users. + has_2fa_enabled: PropertyRef = PropertyRef('hasTwoFactorEnabled') + # specified for affiliated uers only. Unaffiliated users do not have a 'role' in the target organization. + role: PropertyRef = PropertyRef('role') + + +@dataclass(frozen=True) +class GitHubUnaffiliatedUserNodeProperties(BaseGitHubUserNodeProperties): + # No additional properties needed + pass + + +@dataclass(frozen=True) +class GitHubUserToOrganizationRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class GitHubUserMemberOfOrganizationRel(CartographyRelSchema): + target_node_label: str = 'GitHubOrganization' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('MEMBER_OF')}, + ) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "MEMBER_OF" + properties: GitHubUserToOrganizationRelProperties = GitHubUserToOrganizationRelProperties() + + +@dataclass(frozen=True) +class GitHubUserUnaffiliatedOrganizationRel(CartographyRelSchema): + target_node_label: str = 'GitHubOrganization' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('UNAFFILIATED')}, + ) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "UNAFFILIATED" + properties: GitHubUserToOrganizationRelProperties = GitHubUserToOrganizationRelProperties() + + +@dataclass(frozen=True) +class GitHubOrganizationUserSchema(CartographyNodeSchema): + label: str = 'GitHubUser' + properties: GitHubOrganizationUserNodeProperties = GitHubOrganizationUserNodeProperties() + other_relationships: OtherRelationships = OtherRelationships( + [ + GitHubUserMemberOfOrganizationRel(), + ], + ) + sub_resource_relationship = None + + +@dataclass(frozen=True) +class GitHubUnaffiliatedUserSchema(CartographyNodeSchema): + label: str = 'GitHubUser' + properties: GitHubUnaffiliatedUserNodeProperties = GitHubUnaffiliatedUserNodeProperties() + other_relationships: OtherRelationships = OtherRelationships( + [ + GitHubUserUnaffiliatedOrganizationRel(), + ], + ) + sub_resource_relationship = None diff --git a/docs/root/modules/github/schema.md b/docs/root/modules/github/schema.md index 027ac514ce..5a977d3ddf 100644 --- a/docs/root/modules/github/schema.md +++ b/docs/root/modules/github/schema.md @@ -87,6 +87,12 @@ Representation of a single GitHubOrganization [organization object](https://deve (GitHubOrganization)-[RESOURCE]->(GitHubTeam) ``` +- GitHubUsers are members of an organization. In some cases there may be a user who is "unaffiliated" with an org, for example if the user is an enterprise owner, but not member of, the org. [Enterprise owners](https://docs.github.com/en/enterprise-cloud@latest/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/roles-in-an-enterprise#enterprise-owners) have complete control over the enterprise (i.e. they can manage all enterprise settings, members, and policies) yet may not show up on member lists of the GitHub org. + + ``` + (GitHubUser)-[MEMBER_OF|UNAFFILIATED]->(GitHubOrganization) + ``` + ### GitHubTeam @@ -131,10 +137,10 @@ Representation of a single GitHubUser [user object](https://developer.github.com | has_2fa_enabled | Whether the user has 2-factor authentication enabled | | role | Either 'ADMIN' (denoting that the user is an owner of a Github organization) or 'MEMBER' | | is_site_admin | Whether the user is a site admin | -| permission | Only present if the user is an [outside collaborator](https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection) of this repo. -`permission` is either ADMIN, MAINTAIN, READ, TRIAGE, or WRITE ([ref](https://docs.github.com/en/graphql/reference/enums#repositorypermission)). -| email | The user's publicly visible profile email. -| company | The user's public profile company. +| is_enterprise_owner | Whether the user is an [enterprise owner](https://docs.github.com/en/enterprise-cloud@latest/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/roles-in-an-enterprise#enterprise-owners) | +| permission | Only present if the user is an [outside collaborator](https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection) of this repo. `permission` is either ADMIN, MAINTAIN, READ, TRIAGE, or WRITE ([ref](https://docs.github.com/en/graphql/reference/enums#repositorypermission)). | +| email | The user's publicly visible profile email. | +| company | The user's public profile company. | #### Relationships @@ -152,6 +158,12 @@ WRITE, MAINTAIN, TRIAGE, and READ ([Reference](https://docs.github.com/en/graphq (GitHubUser)-[:OUTSIDE_COLLAB_{ACTION}]->(GitHubRepository) ``` +- GitHubUsers are members of an organization. In some cases there may be a user who is "unaffiliated" with an org, for example if the user is an enterprise owner, but not member of, the org. [Enterprise owners](https://docs.github.com/en/enterprise-cloud@latest/admin/managing-accounts-and-repositories/managing-users-in-your-enterprise/roles-in-an-enterprise#enterprise-owners) have complete control over the enterprise (i.e. they can manage all enterprise settings, members, and policies) yet may not show up on member lists of the GitHub org. + + ``` + (GitHubUser)-[MEMBER_OF|UNAFFILIATED]->(GitHubOrganization) + ``` + ### GitHubBranch Representation of a single GitHubBranch [ref object](https://developer.github.com/v4/object/ref). This node contains minimal data for a repository branch. diff --git a/tests/data/github/users.py b/tests/data/github/users.py index b421bd44e2..c1a1842932 100644 --- a/tests/data/github/users.py +++ b/tests/data/github/users.py @@ -1,30 +1,88 @@ -GITHUB_USER_DATA = [ - { - 'hasTwoFactorEnabled': None, - 'node': { - 'url': 'https://example.com/hjsimpson', - 'login': 'hjsimpson', - 'name': 'Homer Simpson', - 'isSiteAdmin': False, - 'email': 'hjsimpson@example.com', - 'company': 'Springfield Nuclear Power Plant', - }, - 'role': 'MEMBER', - }, { - 'hasTwoFactorEnabled': None, - 'node': { - 'url': 'https://example.com/mbsimpson', - 'login': 'mbsimpson', - 'name': 'Marge Simpson', - 'isSiteAdmin': False, - 'email': 'mbsimpson@example.com', - 'company': 'Simpson Residence', - }, - 'role': 'ADMIN', - }, -] - GITHUB_ORG_DATA = { 'url': 'https://example.com/my_org', 'login': 'my_org', } + + +GITHUB_USER_DATA = ( + [ + { + 'hasTwoFactorEnabled': None, + 'node': { + 'url': 'https://example.com/hjsimpson', + 'login': 'hjsimpson', + 'name': 'Homer Simpson', + 'isSiteAdmin': False, + 'email': 'hjsimpson@example.com', + 'company': 'Springfield Nuclear Power Plant', + }, + 'role': 'MEMBER', + }, { + 'hasTwoFactorEnabled': None, + 'node': { + 'url': 'https://example.com/lmsimpson', + 'login': 'lmsimpson', + 'name': 'Lisa Simpson', + 'isSiteAdmin': False, + 'email': 'lmsimpson@example.com', + 'company': 'Simpson Residence', + }, + 'role': 'MEMBER', + }, { + 'hasTwoFactorEnabled': True, + 'node': { + 'url': 'https://example.com/mbsimpson', + 'login': 'mbsimpson', + 'name': 'Marge Simpson', + 'isSiteAdmin': False, + 'email': 'mbsimpson@example.com', + 'company': 'Simpson Residence', + }, + 'role': 'ADMIN', + }, + ], + GITHUB_ORG_DATA, +) + +# Subtle differences between owner data and user data: +# 1. owner data does not include a `hasTwoFactorEnabled` field (it in unavailable in the GraphQL query for these owners) +# 2. an `organizationRole` field instead of a `role` field. In owner data, membership within an org is not assumed, so +# there is an 'UNAFFILIATED' value for owners of an org who are not also members of it. (Otherwise the 'OWNER' +# organizationRole matches the 'ADMIN' role in the user data, and the 'DIRECT_MEMBER' organizationRole matches +# the 'MEMBER' role.) +GITHUB_ENTERPRISE_OWNER_DATA = ( + [ + { + 'node': { + 'url': 'https://example.com/kbroflovski', + 'login': 'kbroflovski', + 'name': 'Kyle Broflovski', + 'isSiteAdmin': False, + 'email': 'kbroflovski@example.com', + 'company': 'South Park Elementary', + }, + 'organizationRole': 'UNAFFILIATED', + }, { + 'node': { + 'url': 'https://example.com/mbsimpson', + 'login': 'mbsimpson', + 'name': 'Marge Simpson', + 'isSiteAdmin': False, + 'email': 'mbsimpson@example.com', + 'company': 'Simpson Residence', + }, + 'organizationRole': 'OWNER', + }, { + 'node': { + 'url': 'https://example.com/lmsimpson', + 'login': 'lmsimpson', + 'name': 'Lisa Simpson', + 'isSiteAdmin': False, + 'email': 'lmsimpson@example.com', + 'company': 'Simpson Residence', + }, + 'organizationRole': 'DIRECT_MEMBER', + }, + ], + GITHUB_ORG_DATA, +) diff --git a/tests/data/graph/querybuilder/sample_data/case_insensitive_prop_ref.py b/tests/data/graph/querybuilder/sample_data/case_insensitive_prop_ref.py index 79ebc78b52..67f05fd52a 100644 --- a/tests/data/graph/querybuilder/sample_data/case_insensitive_prop_ref.py +++ b/tests/data/graph/querybuilder/sample_data/case_insensitive_prop_ref.py @@ -1,33 +1,34 @@ +FAKE_GITHUB_ORG_DATA = { + 'url': 'https://example.com/my_org', + 'login': 'my_org', +} + FAKE_GITHUB_USER_DATA = [ { + 'MEMBER_OF': FAKE_GITHUB_ORG_DATA['url'], 'hasTwoFactorEnabled': None, - 'node': { - 'url': 'https://example.com/hjsimpson', - 'login': 'HjsimPson', # Upper and lowercase - 'name': 'Homer Simpson', - 'isSiteAdmin': False, - 'email': 'hjsimpson@example.com', - 'company': 'Springfield Nuclear Power Plant', - }, + 'url': 'https://example.com/hjsimpson', + 'login': 'HjsimPson', # Upper and lowercase + 'name': 'Homer Simpson', + 'isSiteAdmin': False, + 'isEnterpriseOwner': False, + 'email': 'hjsimpson@example.com', + 'company': 'Springfield Nuclear Power Plant', 'role': 'MEMBER', }, { + 'MEMBER_OF': FAKE_GITHUB_ORG_DATA['url'], 'hasTwoFactorEnabled': None, - 'node': { - 'url': 'https://example.com/mbsimpson', - 'login': 'mbsimp-son', # All lowercase - 'name': 'Marge Simpson', - 'isSiteAdmin': False, - 'email': 'mbsimpson@example.com', - 'company': 'Simpson Residence', - }, + 'url': 'https://example.com/mbsimpson', + 'login': 'mbsimp-son', # All lowercase + 'name': 'Marge Simpson', + 'isEnterpriseOwner': True, + 'isSiteAdmin': False, + 'email': 'mbsimpson@example.com', + 'company': 'Simpson Residence', 'role': 'ADMIN', }, ] -FAKE_GITHUB_ORG_DATA = { - 'url': 'https://example.com/my_org', - 'login': 'my_org', -} FAKE_EMPLOYEE_DATA = [ { diff --git a/tests/integration/cartography/graph/test_querybuilder_case_insensitive.py b/tests/integration/cartography/graph/test_querybuilder_case_insensitive.py index 7a47110377..edb9b6c008 100644 --- a/tests/integration/cartography/graph/test_querybuilder_case_insensitive.py +++ b/tests/integration/cartography/graph/test_querybuilder_case_insensitive.py @@ -1,5 +1,6 @@ from cartography.client.core.tx import load -from cartography.intel.github.users import load_organization_users +from cartography.intel.github.users import load_users +from cartography.models.github.users import GitHubOrganizationUserSchema from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_EMPLOYEE_DATA from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_GITHUB_ORG_DATA from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_GITHUB_USER_DATA @@ -11,8 +12,9 @@ def test_load_team_members_data(neo4j_session): # Arrange: Load some fake GitHubUser nodes to the graph - load_organization_users( + load_users( neo4j_session, + GitHubOrganizationUserSchema(), FAKE_GITHUB_USER_DATA, FAKE_GITHUB_ORG_DATA, TEST_UPDATE_TAG, diff --git a/tests/integration/cartography/graph/test_querybuilder_fuzzy_case_insensitive.py b/tests/integration/cartography/graph/test_querybuilder_fuzzy_case_insensitive.py index 4d624a2a15..730482074f 100644 --- a/tests/integration/cartography/graph/test_querybuilder_fuzzy_case_insensitive.py +++ b/tests/integration/cartography/graph/test_querybuilder_fuzzy_case_insensitive.py @@ -1,5 +1,6 @@ from cartography.client.core.tx import load -from cartography.intel.github.users import load_organization_users +from cartography.intel.github.users import load_users +from cartography.models.github.users import GitHubOrganizationUserSchema from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_EMPLOYEE2_DATA from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_GITHUB_ORG_DATA from tests.data.graph.querybuilder.sample_data.case_insensitive_prop_ref import FAKE_GITHUB_USER_DATA @@ -11,8 +12,9 @@ def test_load_team_members_data_fuzzy(neo4j_session): # Arrange: Load some fake GitHubUser nodes to the graph - load_organization_users( + load_users( neo4j_session, + GitHubOrganizationUserSchema(), FAKE_GITHUB_USER_DATA, FAKE_GITHUB_ORG_DATA, TEST_UPDATE_TAG, diff --git a/tests/integration/cartography/intel/github/test_users.py b/tests/integration/cartography/intel/github/test_users.py index 03766700e1..21a0f5be91 100644 --- a/tests/integration/cartography/intel/github/test_users.py +++ b/tests/integration/cartography/intel/github/test_users.py @@ -1,17 +1,34 @@ +from unittest.mock import patch + import cartography.intel.github.users -import tests.data.github.users +from tests.data.github.users import GITHUB_ENTERPRISE_OWNER_DATA +from tests.data.github.users import GITHUB_ORG_DATA +from tests.data.github.users import GITHUB_USER_DATA TEST_UPDATE_TAG = 123456789 +TEST_JOB_PARAMS = {'UPDATE_TAG': TEST_UPDATE_TAG} +TEST_GITHUB_URL = GITHUB_ORG_DATA['url'] +TEST_GITHUB_ORG = GITHUB_ORG_DATA['login'] +FAKE_API_KEY = 'asdf' + +@patch.object(cartography.intel.github.users, 'get_users', return_value=GITHUB_USER_DATA) +@patch.object(cartography.intel.github.users, 'get_enterprise_owners', return_value=GITHUB_ENTERPRISE_OWNER_DATA) +def test_sync(mock_owners, mock_users, neo4j_session): + # Arrange + # No need to 'arrange' data here. The patched functions return all the data needed. -def test_load_github_organization_users(neo4j_session): - cartography.intel.github.users.load_organization_users( + # Act + cartography.intel.github.users.sync( neo4j_session, - tests.data.github.users.GITHUB_USER_DATA, - tests.data.github.users.GITHUB_ORG_DATA, - TEST_UPDATE_TAG, + TEST_JOB_PARAMS, + FAKE_API_KEY, + TEST_GITHUB_URL, + TEST_GITHUB_ORG, ) + # Assert + # Ensure users got loaded nodes = neo4j_session.run( """ @@ -20,7 +37,9 @@ def test_load_github_organization_users(neo4j_session): ) expected_nodes = { ("https://example.com/hjsimpson", 'MEMBER'), + ("https://example.com/lmsimpson", 'MEMBER'), ("https://example.com/mbsimpson", 'ADMIN'), + ("https://example.com/kbroflovski", None), } actual_nodes = { ( @@ -33,23 +52,74 @@ def test_load_github_organization_users(neo4j_session): # Ensure users are connected to the expected organization nodes = neo4j_session.run( """ - MATCH(user:GitHubUser)-[:MEMBER_OF]->(org:GitHubOrganization) - RETURN user.id, org.id + MATCH(user:GitHubUser)-[r]->(org:GitHubOrganization) + RETURN user.id, type(r), org.id """, ) actual_nodes = { ( n['user.id'], + n['type(r)'], n['org.id'], ) for n in nodes } expected_nodes = { ( 'https://example.com/hjsimpson', + 'MEMBER_OF', + 'https://example.com/my_org', + ), ( + 'https://example.com/lmsimpson', + 'MEMBER_OF', 'https://example.com/my_org', ), ( 'https://example.com/mbsimpson', + 'MEMBER_OF', + 'https://example.com/my_org', + ), ( + 'https://example.com/kbroflovski', + 'UNAFFILIATED', 'https://example.com/my_org', ), } assert actual_nodes == expected_nodes + + # Ensure enterprise owners are identified + nodes = neo4j_session.run( + """ + MATCH (g:GitHubUser) RETURN g.id, g.is_enterprise_owner + """, + ) + expected_nodes = { + ("https://example.com/hjsimpson", False), + ("https://example.com/lmsimpson", True), + ("https://example.com/mbsimpson", True), + ("https://example.com/kbroflovski", True), + } + actual_nodes = { + ( + n['g.id'], + n['g.is_enterprise_owner'], + ) for n in nodes + } + assert actual_nodes == expected_nodes + + # Ensure hasTwoFactorEnabled has not been improperly overwritten for enterprise owners + nodes = neo4j_session.run( + """ + MATCH (g:GitHubUser) RETURN g.id, g.has_2fa_enabled + """, + ) + expected_nodes = { + ("https://example.com/hjsimpson", None), + ("https://example.com/lmsimpson", None), + ("https://example.com/mbsimpson", True), + ("https://example.com/kbroflovski", None), + } + actual_nodes = { + ( + n['g.id'], + n['g.has_2fa_enabled'], + ) for n in nodes + } + assert actual_nodes == expected_nodes