Skip to content

Commit

Permalink
Adding token generation before each service API (#1324)
Browse files Browse the repository at this point in the history
Fix for #1323

Co-authored-by: i_virus <[email protected]>
  • Loading branch information
soumyadipDe and chandanchowdhury authored Oct 11, 2024
1 parent 97d6a39 commit 05916cd
Showing 1 changed file with 110 additions and 23 deletions.
133 changes: 110 additions & 23 deletions cartography/intel/gcp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def _initialize_resources(credentials: GoogleCredentials) -> Resource:
return Resources(
crm_v1=_get_crm_resource_v1(credentials),
crm_v2=_get_crm_resource_v2(credentials),
compute=_get_compute_resource(credentials),
storage=_get_storage_resource(credentials),
container=_get_container_resource(credentials),
serviceusage=_get_serviceusage_resource(credentials),
dns=_get_dns_resource(credentials),
compute=None,
container=None,
dns=None,
storage=None,
)


Expand Down Expand Up @@ -159,12 +159,12 @@ def _services_enabled_on_project(serviceusage: Resource, project_id: str) -> Set
return set()


def _sync_single_project(
def _sync_single_project_compute(
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
common_job_parameters: Dict,
) -> None:
"""
Handles graph sync for a single GCP project.
Handles graph sync for a single GCP project on Compute resources.
:param neo4j_session: The Neo4j session
:param resources: namedtuple of the GCP resource objects
:param project_id: The project ID number to sync. See the `projectId` field in
Expand All @@ -175,14 +175,72 @@ def _sync_single_project(
"""
# Determine the resources available on the project.
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
compute_cred = _get_compute_resource(get_gcp_credentials())
if service_names.compute in enabled_services:
compute.sync(neo4j_session, resources.compute, project_id, gcp_update_tag, common_job_parameters)
compute.sync(neo4j_session, compute_cred, project_id, gcp_update_tag, common_job_parameters)


def _sync_single_project_storage(
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
common_job_parameters: Dict,
) -> None:
"""
Handles graph sync for a single GCP project on Storage resources.
:param neo4j_session: The Neo4j session
:param resources: namedtuple of the GCP resource objects
:param project_id: The project ID number to sync. See the `projectId` field in
https://cloud.google.com/resource-manager/reference/rest/v1/projects
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
:param common_job_parameters: Other parameters sent to Neo4j
:return: Nothing
"""
# Determine the resources available on the project.
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
storage_cred = _get_storage_resource(get_gcp_credentials())
if service_names.storage in enabled_services:
storage.sync_gcp_buckets(neo4j_session, resources.storage, project_id, gcp_update_tag, common_job_parameters)
storage.sync_gcp_buckets(neo4j_session, storage_cred, project_id, gcp_update_tag, common_job_parameters)


def _sync_single_project_gke(
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
common_job_parameters: Dict,
) -> None:
"""
Handles graph sync for a single GCP project GKE resources.
:param neo4j_session: The Neo4j session
:param resources: namedtuple of the GCP resource objects
:param project_id: The project ID number to sync. See the `projectId` field in
https://cloud.google.com/resource-manager/reference/rest/v1/projects
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
:param common_job_parameters: Other parameters sent to Neo4j
:return: Nothing
"""
# Determine the resources available on the project.
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
container_cred = _get_container_resource(get_gcp_credentials())
if service_names.gke in enabled_services:
gke.sync_gke_clusters(neo4j_session, resources.container, project_id, gcp_update_tag, common_job_parameters)
gke.sync_gke_clusters(neo4j_session, container_cred, project_id, gcp_update_tag, common_job_parameters)


def _sync_single_project_dns(
neo4j_session: neo4j.Session, resources: Resource, project_id: str, gcp_update_tag: int,
common_job_parameters: Dict,
) -> None:
"""
Handles graph sync for a single GCP project DNS resources.
:param neo4j_session: The Neo4j session
:param resources: namedtuple of the GCP resource objects
:param project_id: The project ID number to sync. See the `projectId` field in
https://cloud.google.com/resource-manager/reference/rest/v1/projects
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
:param common_job_parameters: Other parameters sent to Neo4j
:return: Nothing
"""
# Determine the resources available on the project.
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
dns_cred = _get_dns_resource(get_gcp_credentials())
if service_names.dns in enabled_services:
dns.sync(neo4j_session, resources.dns, project_id, gcp_update_tag, common_job_parameters)
dns.sync(neo4j_session, dns_cred, project_id, gcp_update_tag, common_job_parameters)


def _sync_multiple_projects(
Expand All @@ -203,26 +261,38 @@ def _sync_multiple_projects(
"""
logger.info("Syncing %d GCP projects.", len(projects))
crm.sync_gcp_projects(neo4j_session, projects, gcp_update_tag, common_job_parameters)
# Compute data sync

This comment has been minimized.

Copy link
@achantavy

achantavy Nov 5, 2024

Contributor

@chandanchowdhury I feel we should have kept the previous interface of syncing a single project. After this change, to write a custom sync command that only runs a single project, we have to do extra work to do that like building the dict.

I get why we did it though: the functions are prefixed with _ so they aren't really formal interfaces. It'd be good to revisit which things are private functions or not.

cc: @ramonpetgrave64

for project in projects:
project_id = project['projectId']
logger.info("Syncing GCP project %s for Compute.", project_id)
_sync_single_project_compute(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)

# Storage data sync
for project in projects:
project_id = project['projectId']
logger.info("Syncing GCP project %s.", project_id)
_sync_single_project(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)
logger.info("Syncing GCP project %s for Storage", project_id)
_sync_single_project_storage(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)

# GKE data sync
for project in projects:
project_id = project['projectId']
logger.info("Syncing GCP project %s for GKE", project_id)
_sync_single_project_gke(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)

# DNS data sync
for project in projects:
project_id = project['projectId']
logger.info("Syncing GCP project %s for DNS", project_id)
_sync_single_project_dns(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters)


@timeit
def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
def get_gcp_credentials() -> GoogleCredentials:
"""
Starts the GCP ingestion process by initializing Google Application Default Credentials, creating the necessary
resource objects, listing all GCP organizations and projects available to the GCP identity, and supplying that
context to all intel modules.
:param neo4j_session: The Neo4j session
:param config: A `cartography.config` object
:return: Nothing
Gets access tokens for GCP API access.
:param: None
:return: GoogleCredentials
"""
common_job_parameters = {
"UPDATE_TAG": config.update_tag,
}
try:
# Explicitly use Application Default Credentials.
# See https://oauth2client.readthedocs.io/en/latest/source/
Expand All @@ -239,7 +309,24 @@ def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
),
e,
)
return
return credentials


@timeit
def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
"""
Starts the GCP ingestion process by initializing Google Application Default Credentials, creating the necessary
resource objects, listing all GCP organizations and projects available to the GCP identity, and supplying that
context to all intel modules.
:param neo4j_session: The Neo4j session
:param config: A `cartography.config` object
:return: Nothing
"""
common_job_parameters = {
"UPDATE_TAG": config.update_tag,
}

credentials = get_gcp_credentials()

resources = _initialize_resources(credentials)

Expand Down

0 comments on commit 05916cd

Please sign in to comment.