Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1210: EBSVolume => new data model, Allow node attr updates from multiple intel modules #1214

Merged
merged 4 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cartography/intel/aws/ec2/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from cartography.models.aws.ec2.reservations import EC2ReservationSchema
from cartography.models.aws.ec2.securitygroups import EC2SecurityGroupSchema
from cartography.models.aws.ec2.subnets import EC2SubnetSchema
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.models.aws.ec2.volumes import EBSVolumeInstanceSchema
from cartography.util import aws_handle_regions
from cartography.util import timeit

Expand Down Expand Up @@ -273,7 +273,7 @@ def load_ec2_instance_ebs_volumes(
) -> None:
load(
neo4j_session,
EBSVolumeSchema(),
EBSVolumeInstanceSchema(),
ebs_data,
Region=region,
AWS_ID=current_aws_account_id,
Expand Down
118 changes: 53 additions & 65 deletions cartography/intel/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import boto3
import neo4j

from cartography.client.core.tx import load
from cartography.graph.job import GraphJob
from cartography.intel.aws.util.arns import build_arn
from cartography.models.aws.ec2.volumes import EBSVolumeSchema
from cartography.util import aws_handle_regions
from cartography.util import timeit
Expand All @@ -16,7 +18,7 @@

@timeit
@aws_handle_regions
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict[str, Any]]:
client = boto3_session.client('ec2', region_name=region)
paginator = client.get_paginator('describe_volumes')
volumes: List[Dict] = []
Expand All @@ -26,90 +28,76 @@ def get_volumes(boto3_session: boto3.session.Session, region: str) -> List[Dict]


def transform_volumes(volumes: List[Dict[str, Any]], region: str, current_aws_account_id: str) -> List[Dict[str, Any]]:
result = []
for volume in volumes:
volume['VolumeArn'] = f"arn:aws:ec2:{region}:{current_aws_account_id}:volume/{volume['VolumeId']}"
volume['CreateTime'] = str(volume['CreateTime'])
return volumes
attachments = volume.get('Attachments', [])
active_attachments = [a for a in attachments if a['State'] == 'attached']

volume_id = volume['VolumeId']
raw_vol = ({
'Arn': build_arn('ec2', current_aws_account_id, 'volume', volume_id, region),
'AvailabilityZone': volume['AvailabilityZone'],
'CreateTime': volume['CreateTime'],
'Encrypted': volume['Encrypted'],
'Size': volume['Size'],
'State': volume['State'],
'OutpostArn': volume['OutpostArn'],
'SnapshotId': volume['SnapshotId'],
'Iops': volume['Iops'],
'FastRestored': volume['FastRestored'],
'MultiAttachEnabled': volume['MultiAttachEnabled'],
'VolumeType': volume['VolumeType'],
'VolumeId': volume_id,
'KmsKeyId': volume['KmsKeyId'],
})

if not active_attachments:
result.append(raw_vol)
continue

for attachment in active_attachments:
vol_with_attachment = raw_vol.copy()
vol_with_attachment['InstanceId'] = attachment['InstanceId']
result.append(vol_with_attachment)

return result


@timeit
def load_volumes(
neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
neo4j_session: neo4j.Session,
ebs_data: List[Dict[str, Any]],
region: str,
current_aws_account_id: str,
update_tag: int,
) -> None:
ingest_volumes = """
UNWIND $volumes_list as volume
MERGE (vol:EBSVolume{id: volume.VolumeId})
ON CREATE SET vol.firstseen = timestamp()
SET vol.arn = volume.VolumeArn,
vol.lastupdated = $update_tag,
vol.availabilityzone = volume.AvailabilityZone,
vol.createtime = volume.CreateTime,
vol.encrypted = volume.Encrypted,
vol.size = volume.Size,
vol.state = volume.State,
vol.outpostarn = volume.OutpostArn,
vol.snapshotid = volume.SnapshotId,
vol.iops = volume.Iops,
vol.fastrestored = volume.FastRestored,
vol.multiattachenabled = volume.MultiAttachEnabled,
vol.type = volume.VolumeType,
vol.kmskeyid = volume.KmsKeyId,
vol.region=$Region
WITH vol
MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
MERGE (aa)-[r:RESOURCE]->(vol)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $update_tag
"""

neo4j_session.run(
ingest_volumes,
volumes_list=data,
AWS_ACCOUNT_ID=current_aws_account_id,
load(
neo4j_session,
EBSVolumeSchema(),
ebs_data,
Region=region,
update_tag=update_tag,
AWS_ID=current_aws_account_id,
lastupdated=update_tag,
)


def load_volume_relationships(
neo4j_session: neo4j.Session,
volumes: List[Dict[str, Any]],
aws_update_tag: int,
) -> None:
add_relationship_query = """
MATCH (volume:EBSVolume{arn: $VolumeArn})
WITH volume
MATCH (instance:EC2Instance{instanceid: $InstanceId})
MERGE (volume)-[r:ATTACHED_TO_EC2_INSTANCE]->(instance)
ON CREATE SET r.firstseen = timestamp()
SET r.lastupdated = $aws_update_tag
"""
for volume in volumes:
for attachment in volume.get('Attachments', []):
if attachment['State'] != 'attached':
continue
neo4j_session.run(
add_relationship_query,
VolumeArn=volume['VolumeArn'],
InstanceId=attachment['InstanceId'],
aws_update_tag=aws_update_tag,
)


@timeit
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
def cleanup_volumes(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]) -> None:
GraphJob.from_node_schema(EBSVolumeSchema(), common_job_parameters).run(neo4j_session)


@timeit
def sync_ebs_volumes(
neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str],
current_aws_account_id: str, update_tag: int, common_job_parameters: Dict,
neo4j_session: neo4j.Session,
boto3_session: boto3.session.Session,
regions: List[str],
current_aws_account_id: str,
update_tag: int,
common_job_parameters: Dict[str, Any],
) -> None:
for region in regions:
logger.debug("Syncing volumes for region '%s' in account '%s'.", region, current_aws_account_id)
data = get_volumes(boto3_session, region)
transformed_data = transform_volumes(data, region, current_aws_account_id)
load_volumes(neo4j_session, transformed_data, region, current_aws_account_id, update_tag)
load_volume_relationships(neo4j_session, transformed_data, update_tag)
cleanup_volumes(neo4j_session, common_job_parameters)
18 changes: 18 additions & 0 deletions cartography/intel/aws/util/arns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Optional


def build_arn(
resource: str,
account: str,
typename: str,
name: str,
region: Optional[str] = None,
partition: Optional[str] = None,
) -> str:
if not partition:
# TODO: support partitions from others. Please file an issue on this if needed, would love to hear from you
partition = 'aws'
if not region:
# Some resources are present in all regions, e.g. IAM policies
region = ""
return f"arn:{partition}:{resource}:{region}:{account}:{typename}/{name}"
46 changes: 45 additions & 1 deletion cartography/models/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,23 @@

@dataclass(frozen=True)
class EBSVolumeNodeProperties(CartographyNodeProperties):
arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('VolumeId')
volumeid: PropertyRef = PropertyRef('VolumeId', extra_index=True)
region: PropertyRef = PropertyRef('Region', set_in_kwargs=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
deleteontermination: PropertyRef = PropertyRef('DeleteOnTermination')
availabilityzone: PropertyRef = PropertyRef('AvailabilityZone')
createtime: PropertyRef = PropertyRef('CreateTime')
encrypted: PropertyRef = PropertyRef('Encrypted')
size: PropertyRef = PropertyRef('Size')
state: PropertyRef = PropertyRef('State')
outpostarn: PropertyRef = PropertyRef('OutpostArn')
snapshotid: PropertyRef = PropertyRef('SnapshotId')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@serge-wq [comment 2 of 2] - .. but the EBSVolume by itself doesn't have that field. It also has fields that are not known by the EBSVolumeInstance.

The description of this PR shows more details on this decision.

iops: PropertyRef = PropertyRef('Iops')
fastrestored: PropertyRef = PropertyRef('FastRestored')
multiattachenabled: PropertyRef = PropertyRef('MultiAttachEnabled')
type: PropertyRef = PropertyRef('VolumeType')
kmskeyid: PropertyRef = PropertyRef('KmsKeyId')


@dataclass(frozen=True)
Expand Down Expand Up @@ -53,6 +66,9 @@ class EBSVolumeToEC2Instance(CartographyRelSchema):

@dataclass(frozen=True)
class EBSVolumeSchema(CartographyNodeSchema):
"""
EBS Volume properties as returned from the EBS Volume API response
"""
label: str = 'EBSVolume'
properties: EBSVolumeNodeProperties = EBSVolumeNodeProperties()
sub_resource_relationship: EBSVolumeToAWSAccount = EBSVolumeToAWSAccount()
Expand All @@ -61,3 +77,31 @@ class EBSVolumeSchema(CartographyNodeSchema):
EBSVolumeToEC2Instance(),
],
)


@dataclass(frozen=True)
class EBSVolumeInstanceProperties(CartographyNodeProperties):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the sample DESCRIBE_INSTANCES, the only properties about Volumes that are returned are just volumeid and deleteontermination, right?
https://github.com/lyft/cartography/blob/facb63bcbac6b68eec0fd2ea3f6b0550ac40eb10/tests/data/aws/ec2/instances.py#L390-L395

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, will update

"""
EBS Volume properties as known by an EC2 instance.
The EC2 instance API response includes a `deleteontermination` field and the volume id.
"""
arn: PropertyRef = PropertyRef('Arn', extra_index=True)
id: PropertyRef = PropertyRef('VolumeId')
volumeid: PropertyRef = PropertyRef('VolumeId', extra_index=True)
lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True)
deleteontermination: PropertyRef = PropertyRef('DeleteOnTermination')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@serge-wq [comment 1 of 2] - ah here we go. Here an EBS volume as known by an EC2 instance has a deleteontermination field...



@dataclass(frozen=True)
class EBSVolumeInstanceSchema(CartographyNodeSchema):
"""
EBS Volume from EC2 Instance API response. This is separate from `EBSVolumeSchema` to prevent issue #1210.
"""
label: str = 'EBSVolume'
properties: EBSVolumeInstanceProperties = EBSVolumeInstanceProperties()
sub_resource_relationship: EBSVolumeToAWSAccount = EBSVolumeToAWSAccount()
other_relationships: OtherRelationships = OtherRelationships(
[
EBSVolumeToEC2Instance(),
],
)
4 changes: 2 additions & 2 deletions tests/data/aws/ec2/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
'Size': 123,
'SnapshotId': 'sn-01',
'State': 'available',
'VolumeId': 'v-01',
'VolumeId': 'vol-0df',
'Iops': 123,
'VolumeType': 'standard',
'FastRestored': True,
Expand All @@ -33,7 +33,7 @@
'OutpostArn': 'arn1',
'Size': 123,
'State': 'available',
'VolumeId': 'v-02',
'VolumeId': 'vol-03',
'Iops': 123,
'SnapshotId': 'sn-02',
'VolumeType': 'standard',
Expand Down
Loading