From be818f48738a52449844b3c3ef166b846a21c527 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Thu, 21 Sep 2023 11:09:22 -0700 Subject: [PATCH] Refactor AWS EKS to new model (#1247) Refactoring EKS module the the new data model since it will make it a lot easier to add in new relationships. Will make a follow-up PR to attach subnets, roles, security groups, VPCs, etc. --- cartography/data/indexes.cypher | 2 - .../jobs/cleanup/aws_import_eks_cleanup.json | 15 --- cartography/intel/aws/eks.py | 100 ++++++++---------- cartography/models/aws/eks/__init__.py | 0 cartography/models/aws/eks/clusters.py | 50 +++++++++ tests/data/aws/eks.py | 8 +- .../cartography/intel/aws/test_eks.py | 89 ++++++---------- 7 files changed, 135 insertions(+), 129 deletions(-) delete mode 100644 cartography/data/jobs/cleanup/aws_import_eks_cleanup.json create mode 100644 cartography/models/aws/eks/__init__.py create mode 100644 cartography/models/aws/eks/clusters.py diff --git a/cartography/data/indexes.cypher b/cartography/data/indexes.cypher index 872dd742d6..a3133f4474 100644 --- a/cartography/data/indexes.cypher +++ b/cartography/data/indexes.cypher @@ -119,8 +119,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerDefinition) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerDefinition) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:ECSContainer) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:ECSContainer) ON (n.lastupdated); -CREATE INDEX IF NOT EXISTS FOR (n:EKSCluster) ON (n.id); -CREATE INDEX IF NOT EXISTS FOR (n:EKSCluster) ON (n.lastupdated); CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.id); CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.arn); CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.lastupdated); diff --git a/cartography/data/jobs/cleanup/aws_import_eks_cleanup.json b/cartography/data/jobs/cleanup/aws_import_eks_cleanup.json deleted file mode 100644 index 100514025f..0000000000 --- a/cartography/data/jobs/cleanup/aws_import_eks_cleanup.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "statements": [ - { - "query": "MATCH (n:EKSCluster)<-[:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE n.lastupdated <> $UPDATE_TAG WITH n LIMIT $LIMIT_SIZE DETACH DELETE (n)", - "iterative": true, - "iterationsize": 100 - }, - { - "query": "MATCH (:EKSCluster)<-[r:RESOURCE]-(:AWSAccount{id: $AWS_ID}) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)", - "iterative": true, - "iterationsize": 100 - } - ], - "name": "cleanup EKSCluster" -} diff --git a/cartography/intel/aws/eks.py b/cartography/intel/aws/eks.py index 47ee7c27e8..07173eccbc 100644 --- a/cartography/intel/aws/eks.py +++ b/cartography/intel/aws/eks.py @@ -6,8 +6,10 @@ import boto3 import neo4j +from cartography.client.core.tx import load +from cartography.graph.job import GraphJob +from cartography.models.aws.eks.clusters import EKSClusterSchema from cartography.util import aws_handle_regions -from cartography.util import run_cleanup_job from cartography.util import timeit logger = logging.getLogger(__name__) @@ -15,9 +17,9 @@ @timeit @aws_handle_regions -def get_eks_clusters(boto3_session: boto3.session.Session, region: str) -> List[Dict]: +def get_eks_clusters(boto3_session: boto3.session.Session, region: str) -> List[str]: client = boto3_session.client('eks', region_name=region) - clusters: List[Dict] = [] + clusters: List[str] = [] paginator = client.get_paginator('list_clusters') for page in paginator.paginate(): clusters.extend(page['clusters']) @@ -33,49 +35,20 @@ def get_eks_describe_cluster(boto3_session: boto3.session.Session, region: str, @timeit def load_eks_clusters( - neo4j_session: neo4j.Session, cluster_data: Dict, region: str, current_aws_account_id: str, - aws_update_tag: int, + neo4j_session: neo4j.Session, + cluster_data: List[Dict[str, Any]], + region: str, + current_aws_account_id: str, + aws_update_tag: int, ) -> None: - query: str = """ - MERGE (cluster:EKSCluster{id: $ClusterArn}) - ON CREATE SET cluster.firstseen = timestamp(), - cluster.arn = $ClusterArn, - cluster.name = $ClusterName, - cluster.region = $Region, - cluster.created_at = $CreatedAt - SET cluster.lastupdated = $aws_update_tag, - cluster.endpoint = $ClusterEndpoint, - cluster.endpoint_public_access = $ClusterEndointPublic, - cluster.rolearn = $ClusterRoleArn, - cluster.version = $ClusterVersion, - cluster.platform_version = $ClusterPlatformVersion, - cluster.status = $ClusterStatus, - cluster.audit_logging = $ClusterLogging - WITH cluster - MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID}) - MERGE (owner)-[r:RESOURCE]->(cluster) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = $aws_update_tag - """ - - for cd in cluster_data: - cluster = cluster_data[cd] - neo4j_session.run( - query, - ClusterArn=cluster['arn'], - ClusterName=cluster['name'], - ClusterEndpoint=cluster.get('endpoint'), - ClusterEndointPublic=cluster.get('resourcesVpcConfig', {}).get('endpointPublicAccess'), - ClusterRoleArn=cluster.get('roleArn'), - ClusterVersion=cluster.get('version'), - ClusterPlatformVersion=cluster.get('platformVersion'), - ClusterStatus=cluster.get('status'), - CreatedAt=str(cluster.get('createdAt')), - ClusterLogging=_process_logging(cluster), - Region=region, - aws_update_tag=aws_update_tag, - AWS_ACCOUNT_ID=current_aws_account_id, - ) + load( + neo4j_session, + EKSClusterSchema(), + cluster_data, + Region=region, + AWS_ID=current_aws_account_id, + lastupdated=aws_update_tag, + ) def _process_logging(cluster: Dict) -> bool: @@ -91,24 +64,43 @@ def _process_logging(cluster: Dict) -> bool: @timeit -def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None: - run_cleanup_job('aws_import_eks_cleanup.json', neo4j_session, common_job_parameters) +def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None: + logger.info("Running EKS cluster cleanup") + GraphJob.from_node_schema(EKSClusterSchema(), common_job_parameters).run(neo4j_session) + + +def transform(cluster_data: Dict[str, Any]) -> List[Dict[str, Any]]: + transformed_list = [] + for cluster_name, cluster_dict in cluster_data.items(): + transformed_dict = cluster_dict.copy() + transformed_dict['ClusterLogging'] = _process_logging(transformed_dict) + transformed_dict['ClusterEndpointPublic'] = transformed_dict.get('resourcesVpcConfig', {}).get( + 'endpointPublicAccess', + ) + if 'createdAt' in transformed_dict: + transformed_dict['created_at'] = str(transformed_dict['createdAt']) + transformed_list.append(transformed_dict) + return transformed_list @timeit def sync( - neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str, - update_tag: int, common_job_parameters: Dict, + neo4j_session: neo4j.Session, + boto3_session: boto3.session.Session, + regions: List[str], + current_aws_account_id: str, + update_tag: int, + common_job_parameters: Dict[str, Any], ) -> None: for region in regions: logger.info("Syncing EKS for region '%s' in account '%s'.", region, current_aws_account_id) - clusters: List[Dict] = get_eks_clusters(boto3_session, region) - - cluster_data: Dict = {} + clusters: List[str] = get_eks_clusters(boto3_session, region) + cluster_data = {} for cluster_name in clusters: - cluster_data[cluster_name] = get_eks_describe_cluster(boto3_session, region, cluster_name) # type: ignore + cluster_data[cluster_name] = get_eks_describe_cluster(boto3_session, region, cluster_name) + transformed_list = transform(cluster_data) - load_eks_clusters(neo4j_session, cluster_data, region, current_aws_account_id, update_tag) + load_eks_clusters(neo4j_session, transformed_list, region, current_aws_account_id, update_tag) cleanup(neo4j_session, common_job_parameters) diff --git a/cartography/models/aws/eks/__init__.py b/cartography/models/aws/eks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cartography/models/aws/eks/clusters.py b/cartography/models/aws/eks/clusters.py new file mode 100644 index 0000000000..38af75547b --- /dev/null +++ b/cartography/models/aws/eks/clusters.py @@ -0,0 +1,50 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties +from cartography.models.core.nodes import CartographyNodeSchema +from cartography.models.core.relationships import CartographyRelProperties +from cartography.models.core.relationships import CartographyRelSchema +from cartography.models.core.relationships import LinkDirection +from cartography.models.core.relationships import make_target_node_matcher +from cartography.models.core.relationships import TargetNodeMatcher + + +@dataclass(frozen=True) +class EKSClusterNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('arn') + arn: PropertyRef = PropertyRef('arn', extra_index=True) + name: PropertyRef = PropertyRef('name', extra_index=True) + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) + created_at: PropertyRef = PropertyRef('created_at') + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + endpoint: PropertyRef = PropertyRef('endpoint') + endpoint_public_access: PropertyRef = PropertyRef('ClusterEndpointPublic') + rolearn: PropertyRef = PropertyRef('roleArn') + version: PropertyRef = PropertyRef('version') + platform_version: PropertyRef = PropertyRef('platformVersion') + status: PropertyRef = PropertyRef('status') + audit_logging: PropertyRef = PropertyRef('ClusterLogging') + + +@dataclass(frozen=True) +class EKSClusterToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class EKSClusterToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('AWS_ID', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EKSClusterToAwsAccountRelProperties = EKSClusterToAwsAccountRelProperties() + + +@dataclass(frozen=True) +class EKSClusterSchema(CartographyNodeSchema): + label: str = 'EKSCluster' + properties: EKSClusterNodeProperties = EKSClusterNodeProperties() + sub_resource_relationship: EKSClusterToAWSAccount = EKSClusterToAWSAccount() diff --git a/tests/data/aws/eks.py b/tests/data/aws/eks.py index 16544394bd..29de1ccaef 100644 --- a/tests/data/aws/eks.py +++ b/tests/data/aws/eks.py @@ -5,8 +5,8 @@ "cluster_2", ] -DESCRIBE_CLUSTERS = { - "cluster_1": { +DESCRIBE_CLUSTERS = [ + { "name": "cluster_1", "arn": "arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1", "createdAt": datetime.datetime(2019, 1, 1, 0, 0, 1), @@ -35,7 +35,7 @@ }, "tags": {}, }, - "cluster_2": { + { "name": "cluster_2", "arn": "arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2", "createdAt": datetime.datetime(2019, 1, 1, 0, 0, 1), @@ -64,4 +64,4 @@ }, "tags": {}, }, -} +] diff --git a/tests/integration/cartography/intel/aws/test_eks.py b/tests/integration/cartography/intel/aws/test_eks.py index a43de2260e..afe4143a9b 100644 --- a/tests/integration/cartography/intel/aws/test_eks.py +++ b/tests/integration/cartography/intel/aws/test_eks.py @@ -1,70 +1,51 @@ +from unittest.mock import MagicMock +from unittest.mock import patch + import cartography.intel.aws.eks -import tests.data.aws.eks +from cartography.intel.aws.eks import sync +from tests.data.aws.eks import DESCRIBE_CLUSTERS +from tests.data.aws.eks import LIST_CLUSTERS +from tests.integration.cartography.intel.aws.common import create_test_account +from tests.integration.util import check_nodes +from tests.integration.util import check_rels TEST_ACCOUNT_ID = '000000000000' TEST_REGION = 'eu-west-1' TEST_UPDATE_TAG = 123456789 -def test_load_eks_clusters_nodes(neo4j_session): - data = tests.data.aws.eks.DESCRIBE_CLUSTERS - cartography.intel.aws.eks.load_eks_clusters( +@patch.object(cartography.intel.aws.eks, 'get_eks_clusters', return_value=LIST_CLUSTERS) +@patch.object(cartography.intel.aws.eks, 'get_eks_describe_cluster', side_effect=DESCRIBE_CLUSTERS) +def test_sync_eks_clusters(mock_describe_clusters, mock_get_clusters, neo4j_session): + # Arrange + create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG) + boto3_session = MagicMock() + + # Act + sync( neo4j_session, - data, - TEST_REGION, + boto3_session, + [TEST_REGION], TEST_ACCOUNT_ID, TEST_UPDATE_TAG, + {'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID}, ) - expected_nodes = { - "arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1", - "arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2", + # Assert + assert check_nodes(neo4j_session, 'EKSCluster', ['id', 'platform_version']) == { + ('arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1', 'eks.9'), + ('arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2', 'eks.9'), } - nodes = neo4j_session.run( - """ - MATCH (r:EKSCluster) RETURN r.arn; - """, - ) - actual_nodes = {n['r.arn'] for n in nodes} - - assert actual_nodes == expected_nodes - - -def test_load_eks_clusters_relationships(neo4j_session): - # Create Test AWSAccount - neo4j_session.run( - """ - MERGE (aws:AWSAccount{id: $aws_account_id}) - ON CREATE SET aws.firstseen = timestamp() - SET aws.lastupdated = $aws_update_tag - """, - aws_account_id=TEST_ACCOUNT_ID, - aws_update_tag=TEST_UPDATE_TAG, - ) - - # Load Test EKS Clusters - data = tests.data.aws.eks.DESCRIBE_CLUSTERS - cartography.intel.aws.eks.load_eks_clusters( + assert check_rels( neo4j_session, - data, - TEST_REGION, - TEST_ACCOUNT_ID, - TEST_UPDATE_TAG, - ) - expected = { - (TEST_ACCOUNT_ID, 'arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1'), - (TEST_ACCOUNT_ID, 'arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2'), + 'EKSCluster', + 'id', + 'AWSAccount', + 'id', + 'RESOURCE', + rel_direction_right=False, + ) == { + ('arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1', '000000000000'), + ('arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2', '000000000000'), } - - # Fetch relationships - result = neo4j_session.run( - """ - MATCH (n1:AWSAccount)-[:RESOURCE]->(n2:EKSCluster) RETURN n1.id, n2.arn; - """, - ) - actual = { - (r['n1.id'], r['n2.arn']) for r in result - } - - assert actual == expected