Skip to content

Commit

Permalink
Refactor AWS EKS to new model (#1247)
Browse files Browse the repository at this point in the history
Refactoring EKS module the the new data model since it will make it a
lot easier to add in new relationships.

Will make a follow-up PR to attach subnets, roles, security groups,
VPCs, etc.
  • Loading branch information
achantavy authored Sep 21, 2023
1 parent eedccbb commit ef5cbce
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 129 deletions.
2 changes: 0 additions & 2 deletions cartography/data/indexes.cypher
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerDefinition) ON (n.id);
CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerDefinition) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:ECSContainer) ON (n.id);
CREATE INDEX IF NOT EXISTS FOR (n:ECSContainer) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:EKSCluster) ON (n.id);
CREATE INDEX IF NOT EXISTS FOR (n:EKSCluster) ON (n.lastupdated);
CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.id);
CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.arn);
CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.lastupdated);
Expand Down
15 changes: 0 additions & 15 deletions cartography/data/jobs/cleanup/aws_import_eks_cleanup.json

This file was deleted.

100 changes: 46 additions & 54 deletions cartography/intel/aws/eks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@
import boto3
import neo4j

from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.models.aws.eks.clusters import EKSClusterSchema
from cartography.util import aws_handle_regions
from cartography.util import run_cleanup_job
from cartography.util import timeit

logger = logging.getLogger(__name__)


@timeit
@aws_handle_regions
def get_eks_clusters(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
def get_eks_clusters(boto3_session: boto3.session.Session, region: str) -> List[str]:
client = boto3_session.client('eks', region_name=region)
clusters: List[Dict] = []
clusters: List[str] = []
paginator = client.get_paginator('list_clusters')
for page in paginator.paginate():
clusters.extend(page['clusters'])
Expand All @@ -33,49 +35,20 @@ def get_eks_describe_cluster(boto3_session: boto3.session.Session, region: str,

@timeit
def load_eks_clusters(
neo4j_session: neo4j.Session, cluster_data: Dict, region: str, current_aws_account_id: str,
aws_update_tag: int,
neo4j_session: neo4j.Session,
cluster_data: List[Dict[str, Any]],
region: str,
current_aws_account_id: str,
aws_update_tag: int,
) -> None:
query: str = """
MERGE (cluster:EKSCluster{id: $ClusterArn})
ON CREATE SET cluster.firstseen = timestamp(),
cluster.arn = $ClusterArn,
cluster.name = $ClusterName,
cluster.region = $Region,
cluster.created_at = $CreatedAt
SET cluster.lastupdated = $aws_update_tag,
cluster.endpoint = $ClusterEndpoint,
cluster.endpoint_public_access = $ClusterEndointPublic,
cluster.rolearn = $ClusterRoleArn,
cluster.version = $ClusterVersion,
cluster.platform_version = $ClusterPlatformVersion,
cluster.status = $ClusterStatus,
cluster.audit_logging = $ClusterLogging
WITH cluster
MATCH (owner:AWSAccount{id: $AWS_ACCOUNT_ID})
MERGE (owner)-[r:RESOURCE]->(cluster)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
"""

for cd in cluster_data:
cluster = cluster_data[cd]
neo4j_session.run(
query,
ClusterArn=cluster['arn'],
ClusterName=cluster['name'],
ClusterEndpoint=cluster.get('endpoint'),
ClusterEndointPublic=cluster.get('resourcesVpcConfig', {}).get('endpointPublicAccess'),
ClusterRoleArn=cluster.get('roleArn'),
ClusterVersion=cluster.get('version'),
ClusterPlatformVersion=cluster.get('platformVersion'),
ClusterStatus=cluster.get('status'),
CreatedAt=str(cluster.get('createdAt')),
ClusterLogging=_process_logging(cluster),
Region=region,
aws_update_tag=aws_update_tag,
AWS_ACCOUNT_ID=current_aws_account_id,
)
load(
neo4j_session,
EKSClusterSchema(),
cluster_data,
Region=region,
AWS_ID=current_aws_account_id,
lastupdated=aws_update_tag,
)


def _process_logging(cluster: Dict) -> bool:
Expand All @@ -91,24 +64,43 @@ def _process_logging(cluster: Dict) -> bool:


@timeit
def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
run_cleanup_job('aws_import_eks_cleanup.json', neo4j_session, common_job_parameters)
def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None:
logger.info("Running EKS cluster cleanup")
GraphJob.from_node_schema(EKSClusterSchema(), common_job_parameters).run(neo4j_session)


def transform(cluster_data: Dict[str, Any]) -> List[Dict[str, Any]]:
transformed_list = []
for cluster_name, cluster_dict in cluster_data.items():
transformed_dict = cluster_dict.copy()
transformed_dict['ClusterLogging'] = _process_logging(transformed_dict)
transformed_dict['ClusterEndpointPublic'] = transformed_dict.get('resourcesVpcConfig', {}).get(
'endpointPublicAccess',
)
if 'createdAt' in transformed_dict:
transformed_dict['created_at'] = str(transformed_dict['createdAt'])
transformed_list.append(transformed_dict)
return transformed_list


@timeit
def sync(
neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str,
update_tag: int, common_job_parameters: Dict,
neo4j_session: neo4j.Session,
boto3_session: boto3.session.Session,
regions: List[str],
current_aws_account_id: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
) -> None:
for region in regions:
logger.info("Syncing EKS for region '%s' in account '%s'.", region, current_aws_account_id)

clusters: List[Dict] = get_eks_clusters(boto3_session, region)

cluster_data: Dict = {}
clusters: List[str] = get_eks_clusters(boto3_session, region)
cluster_data = {}
for cluster_name in clusters:
cluster_data[cluster_name] = get_eks_describe_cluster(boto3_session, region, cluster_name) # type: ignore
cluster_data[cluster_name] = get_eks_describe_cluster(boto3_session, region, cluster_name)
transformed_list = transform(cluster_data)

load_eks_clusters(neo4j_session, cluster_data, region, current_aws_account_id, update_tag)
load_eks_clusters(neo4j_session, transformed_list, region, current_aws_account_id, update_tag)

cleanup(neo4j_session, common_job_parameters)
Empty file.
50 changes: 50 additions & 0 deletions cartography/models/aws/eks/clusters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from dataclasses import dataclass

from cartography.models.core.common import PropertyRef
from cartography.models.core.nodes import CartographyNodeProperties
from cartography.models.core.nodes import CartographyNodeSchema
from cartography.models.core.relationships import CartographyRelProperties
from cartography.models.core.relationships import CartographyRelSchema
from cartography.models.core.relationships import LinkDirection
from cartography.models.core.relationships import make_target_node_matcher
from cartography.models.core.relationships import TargetNodeMatcher


@dataclass(frozen=True)
class EKSClusterNodeProperties(CartographyNodeProperties):
id: PropertyRef = PropertyRef('arn')
arn: PropertyRef = PropertyRef('arn', extra_index=True)
name: PropertyRef = PropertyRef('name', extra_index=True)
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
created_at: PropertyRef = PropertyRef('created_at')
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
endpoint: PropertyRef = PropertyRef('endpoint')
endpoint_public_access: PropertyRef = PropertyRef('ClusterEndpointPublic')
rolearn: PropertyRef = PropertyRef('roleArn')
version: PropertyRef = PropertyRef('version')
platform_version: PropertyRef = PropertyRef('platformVersion')
status: PropertyRef = PropertyRef('status')
audit_logging: PropertyRef = PropertyRef('ClusterLogging')


@dataclass(frozen=True)
class EKSClusterToAwsAccountRelProperties(CartographyRelProperties):
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)


@dataclass(frozen=True)
class EKSClusterToAWSAccount(CartographyRelSchema):
target_node_label: str = 'AWSAccount'
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
{'id': PropertyRef('AWS_ID', set_in_kwargs=True)},
)
direction: LinkDirection = LinkDirection.INWARD
rel_label: str = "RESOURCE"
properties: EKSClusterToAwsAccountRelProperties = EKSClusterToAwsAccountRelProperties()


@dataclass(frozen=True)
class EKSClusterSchema(CartographyNodeSchema):
label: str = 'EKSCluster'
properties: EKSClusterNodeProperties = EKSClusterNodeProperties()
sub_resource_relationship: EKSClusterToAWSAccount = EKSClusterToAWSAccount()
8 changes: 4 additions & 4 deletions tests/data/aws/eks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"cluster_2",
]

DESCRIBE_CLUSTERS = {
"cluster_1": {
DESCRIBE_CLUSTERS = [
{
"name": "cluster_1",
"arn": "arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1",
"createdAt": datetime.datetime(2019, 1, 1, 0, 0, 1),
Expand Down Expand Up @@ -35,7 +35,7 @@
},
"tags": {},
},
"cluster_2": {
{
"name": "cluster_2",
"arn": "arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2",
"createdAt": datetime.datetime(2019, 1, 1, 0, 0, 1),
Expand Down Expand Up @@ -64,4 +64,4 @@
},
"tags": {},
},
}
]
89 changes: 35 additions & 54 deletions tests/integration/cartography/intel/aws/test_eks.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,51 @@
from unittest.mock import MagicMock
from unittest.mock import patch

import cartography.intel.aws.eks
import tests.data.aws.eks
from cartography.intel.aws.eks import sync
from tests.data.aws.eks import DESCRIBE_CLUSTERS
from tests.data.aws.eks import LIST_CLUSTERS
from tests.integration.cartography.intel.aws.common import create_test_account
from tests.integration.util import check_nodes
from tests.integration.util import check_rels

TEST_ACCOUNT_ID = '000000000000'
TEST_REGION = 'eu-west-1'
TEST_UPDATE_TAG = 123456789


def test_load_eks_clusters_nodes(neo4j_session):
data = tests.data.aws.eks.DESCRIBE_CLUSTERS
cartography.intel.aws.eks.load_eks_clusters(
@patch.object(cartography.intel.aws.eks, 'get_eks_clusters', return_value=LIST_CLUSTERS)
@patch.object(cartography.intel.aws.eks, 'get_eks_describe_cluster', side_effect=DESCRIBE_CLUSTERS)
def test_sync_eks_clusters(mock_describe_clusters, mock_get_clusters, neo4j_session):
# Arrange
create_test_account(neo4j_session, TEST_ACCOUNT_ID, TEST_UPDATE_TAG)
boto3_session = MagicMock()

# Act
sync(
neo4j_session,
data,
TEST_REGION,
boto3_session,
[TEST_REGION],
TEST_ACCOUNT_ID,
TEST_UPDATE_TAG,
{'UPDATE_TAG': TEST_UPDATE_TAG, 'AWS_ID': TEST_ACCOUNT_ID},
)

expected_nodes = {
"arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1",
"arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2",
# Assert
assert check_nodes(neo4j_session, 'EKSCluster', ['id', 'platform_version']) == {
('arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1', 'eks.9'),
('arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2', 'eks.9'),
}

nodes = neo4j_session.run(
"""
MATCH (r:EKSCluster) RETURN r.arn;
""",
)
actual_nodes = {n['r.arn'] for n in nodes}

assert actual_nodes == expected_nodes


def test_load_eks_clusters_relationships(neo4j_session):
# Create Test AWSAccount
neo4j_session.run(
"""
MERGE (aws:AWSAccount{id: $aws_account_id})
ON CREATE SET aws.firstseen = timestamp()
SET aws.lastupdated = $aws_update_tag
""",
aws_account_id=TEST_ACCOUNT_ID,
aws_update_tag=TEST_UPDATE_TAG,
)

# Load Test EKS Clusters
data = tests.data.aws.eks.DESCRIBE_CLUSTERS
cartography.intel.aws.eks.load_eks_clusters(
assert check_rels(
neo4j_session,
data,
TEST_REGION,
TEST_ACCOUNT_ID,
TEST_UPDATE_TAG,
)
expected = {
(TEST_ACCOUNT_ID, 'arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1'),
(TEST_ACCOUNT_ID, 'arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2'),
'EKSCluster',
'id',
'AWSAccount',
'id',
'RESOURCE',
rel_direction_right=False,
) == {
('arn:aws:eks:eu-west-1:111111111111:cluster/cluster_1', '000000000000'),
('arn:aws:eks:eu-west-2:222222222222:cluster/cluster_2', '000000000000'),
}

# Fetch relationships
result = neo4j_session.run(
"""
MATCH (n1:AWSAccount)-[:RESOURCE]->(n2:EKSCluster) RETURN n1.id, n2.arn;
""",
)
actual = {
(r['n1.id'], r['n2.arn']) for r in result
}

assert actual == expected

0 comments on commit ef5cbce

Please sign in to comment.