From 9fdad68aa091d03dd0ed4dbfbcbd8a46662474ba Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 9 Dec 2024 23:05:19 -0800 Subject: [PATCH] initial ideas --- cartography/graph/cleanupbuilder.py | 50 +++++++++++++++-- .../node_without_sub_resource.py | 43 +++++++++++++++ tests/data/util/__init__.py | 0 tests/data/util/fake_data.py | 54 +++++++++++++++++++ .../test_cleanupbuilder_no_sub_resource.py | 49 +++++++++++++++++ .../cartography/graph/test_cleanupbuilder.py | 29 +++++++--- 6 files changed, 213 insertions(+), 12 deletions(-) create mode 100644 tests/data/graph/querybuilder/sample_models/node_without_sub_resource.py create mode 100644 tests/data/util/__init__.py create mode 100644 tests/data/util/fake_data.py create mode 100644 tests/integration/cartography/graph/test_cleanupbuilder_no_sub_resource.py diff --git a/cartography/graph/cleanupbuilder.py b/cartography/graph/cleanupbuilder.py index bee0fa6519..f74b108e95 100644 --- a/cartography/graph/cleanupbuilder.py +++ b/cartography/graph/cleanupbuilder.py @@ -16,7 +16,17 @@ def build_cleanup_queries(node_schema: CartographyNodeSchema) -> List[str]: """ Generates queries to clean up stale nodes and relationships from the given CartographyNodeSchema. Note that auto-cleanups for a node with no relationships is not currently supported. + Algorithm: + 1. If node_schema has no relationships at all, return empty. + + Otherwise, + + 1. If node_schema doesn't have a sub_resource relationship, generate queries only to clean up its other + relationships. No nodes will be cleaned up. + + Otherwise, + 1. First delete all stale nodes attached to the node_schema's sub resource 2. Delete all stale node to sub resource relationships - We don't expect this to be very common (never for AWS resources, at least), but in case it is possible for an @@ -25,11 +35,15 @@ def build_cleanup_queries(node_schema: CartographyNodeSchema) -> List[str]: :param node_schema: The given CartographyNodeSchema :return: A list of Neo4j queries to clean up nodes and relationships. """ + if not node_schema.sub_resource_relationship and not node_schema.other_relationships: + return [] + if not node_schema.sub_resource_relationship: - raise ValueError( - "Auto-creating a cleanup job for a node_schema without a sub resource relationship is not supported. " - f'Please check the class definition of "{node_schema.__class__.__name__}".', - ) + queries = [] + for rel in node_schema.other_relationships.rels: + query = _build_cleanup_rel_query_no_sub_resource(node_schema, rel) + queries.append(query) + return queries result = _build_cleanup_node_and_rel_queries(node_schema, node_schema.sub_resource_relationship) if node_schema.other_relationships: @@ -41,6 +55,34 @@ def build_cleanup_queries(node_schema: CartographyNodeSchema) -> List[str]: return result +def _build_cleanup_rel_query_no_sub_resource( + node_schema: CartographyNodeSchema, + selected_relationship: CartographyRelSchema, +) -> str: + """ + Helper function to delete stale relationships for node_schemas that have no sub resource relationship defined. + """ + if node_schema.sub_resource_relationship: + raise ValueError( + f'Expected {node_schema.label} to not exist. ' + 'This function is intended for node_schemas without sub_resource_relationships.' + ) + # Ensure the node is attached to the sub resource and delete the node + query_template = Template( + """ + MATCH (n:$node_label) + $selected_rel_clause + WHERE r.lastupdated <> $UPDATE_TAG + WITH r LIMIT $LIMIT_SIZE + DELETE r; + """, + ) + return query_template.safe_substitute( + node_label=node_schema.label, + selected_rel_clause=_build_selected_rel_clause(selected_relationship) + ) + + def _build_cleanup_node_and_rel_queries( node_schema: CartographyNodeSchema, selected_relationship: CartographyRelSchema, diff --git a/tests/data/graph/querybuilder/sample_models/node_without_sub_resource.py b/tests/data/graph/querybuilder/sample_models/node_without_sub_resource.py new file mode 100644 index 0000000000..5bd90b32ff --- /dev/null +++ b/tests/data/graph/querybuilder/sample_models/node_without_sub_resource.py @@ -0,0 +1,43 @@ +from dataclasses import dataclass + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeSchema, CartographyNodeProperties +from cartography.models.core.relationships import CartographyRelProperties, CartographyRelSchema, TargetNodeMatcher, \ + LinkDirection, make_target_node_matcher, OtherRelationships + + +# Test defining a simple node with no relationships. +@dataclass(frozen=True) +class NodeAProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + property1: PropertyRef = PropertyRef('property1') + property2: PropertyRef = PropertyRef('property2') + + +# Test defining a simple node attached to another node +@dataclass(frozen=True) +class NodeAToNodeBProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class NodeAToNodeB(CartographyRelSchema): + target_node_label: str = 'SimpleNode' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( + {'id': PropertyRef('sub_resource_id', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "POINTS_TO" + properties: NodeAToNodeBProps = NodeAToNodeBProps() + + +@dataclass(frozen=True) +class NodeA(CartographyNodeSchema): + label: str = 'NodeA' + properties: NodeAProperties = NodeAProperties() + other_relationships: OtherRelationships = OtherRelationships( + [ + NodeAToNodeB(), + ] + ) diff --git a/tests/data/util/__init__.py b/tests/data/util/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/data/util/fake_data.py b/tests/data/util/fake_data.py new file mode 100644 index 0000000000..ab2a18ab3f --- /dev/null +++ b/tests/data/util/fake_data.py @@ -0,0 +1,54 @@ +from dataclasses import asdict, fields +from typing import Any + +from cartography.models.core.common import PropertyRef +from cartography.models.core.nodes import CartographyNodeProperties, CartographyNodeSchema +from cartography.models.core.relationships import OtherRelationships, TargetNodeMatcher, CartographyRelSchema + + +def _get_propref_keys_from_node_props(node_props: type[CartographyNodeProperties]) -> list[str]: + result = [] + for field in fields(node_props): + prop_ref: PropertyRef = field.default + if prop_ref and prop_ref.set_in_kwargs is False: + result.append( + str(prop_ref).split('.')[1] + ) + return result + + +def _get_propref_keys_from_rel(rel: type[CartographyRelSchema]) -> list[str]: + result = [] + tgm: TargetNodeMatcher = rel.target_node_matcher + for field in fields(tgm): + prop_ref: PropertyRef = field.default + if prop_ref and prop_ref.set_in_kwargs is False: + result.append( + str(prop_ref).split('.')[1] + ) + return result + + + + +def generate_fake_data(count: int, node_schema: type[CartographyNodeSchema]) -> list[dict[str, Any]]: + """ + make me 10 fake node As and 10 fake node Bs + for the node As, attach the 0th rel on them + """ + fake_data = [] + node_props = node_schema.properties + props = _get_propref_keys_from_node_props(node_props) + props_from_other_rels = [] + + other_rels: OtherRelationships = node_schema.other_relationships + if other_rels: + for rel in other_rels.rels: + props.extend(_get_propref_keys_from_rel(rel)) + + + for i in range(count): + fake_data.append( + {prop: str(i) for prop in props} + ) + return fake_data diff --git a/tests/integration/cartography/graph/test_cleanupbuilder_no_sub_resource.py b/tests/integration/cartography/graph/test_cleanupbuilder_no_sub_resource.py new file mode 100644 index 0000000000..6096834b23 --- /dev/null +++ b/tests/integration/cartography/graph/test_cleanupbuilder_no_sub_resource.py @@ -0,0 +1,49 @@ +from dataclasses import fields + +from cartography.client.core.tx import load_graph_data +from cartography.graph.cleanupbuilder import build_cleanup_queries +from cartography.graph.job import GraphJob +from cartography.graph.querybuilder import build_ingestion_query +from tests.data.graph.querybuilder.sample_models.node_without_sub_resource import NodeA, NodeAProperties +from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeSchema, SimpleNodeProperties +from tests.data.util.fake_data import generate_fake_data + + +def test_build_cleanup_queries_no_sub_resource(neo4j_session): + # Arrange + data = generate_fake_data(10, SimpleNodeProperties) + query = build_ingestion_query(SimpleNodeSchema()) + load_graph_data( + neo4j_session, + query, + data, + lastupdated=1, + ) + + data = generate_fake_data(10, NodeAProperties) + query = build_ingestion_query(NodeA()) + load_graph_data( + neo4j_session, + query, + data, + lastupdated=1, + sub_resource_id=3, + ) + + # Act + common_job_parameters = {'UPDATE_TAG' : 1} + cleanup_job = GraphJob.from_node_schema(NodeA(), common_job_parameters) + cleanup_job.run(neo4j_session) + + + expected_queries = [ + """ + MATCH (n:NodeA) + MATCH (n)<-[r:POINTS_TO]-(:NodeB) + WHERE r.lastupdated <> $UPDATE_TAG + WITH r LIMIT $LIMIT_SIZE + DELETE r; + """ + ] + + assert clean_query_list(actual_queries) == clean_query_list(expected_queries) diff --git a/tests/unit/cartography/graph/test_cleanupbuilder.py b/tests/unit/cartography/graph/test_cleanupbuilder.py index 3d9a01dd6d..5ec968f436 100644 --- a/tests/unit/cartography/graph/test_cleanupbuilder.py +++ b/tests/unit/cartography/graph/test_cleanupbuilder.py @@ -11,6 +11,7 @@ from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetSchema from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetToHelloAssetRel from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetToSubResourceRel +from tests.data.graph.querybuilder.sample_models.node_without_sub_resource import NodeA from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeSchema from tests.unit.cartography.graph.helpers import clean_query_list @@ -120,14 +121,26 @@ def test_get_params_from_queries(): assert set(get_parameters(queries)) == {'UPDATE_TAG', 'sub_resource_id', 'LIMIT_SIZE'} -def test_build_cleanup_queries_selected_rels_no_sub_res_raises_exc(): - """ - Test that not specifying the sub resource rel as a selected_relationship in build_cleanup_queries raises exception - """ - with pytest.raises(ValueError, match='node_schema without a sub resource relationship is not supported'): - build_cleanup_queries(SimpleNodeSchema()) - - def test_build_cleanup_node_and_rel_queries_sub_res_tgm_not_validated_raises_exc(): with pytest.raises(ValueError, match='must have set_in_kwargs=True'): _build_cleanup_node_and_rel_queries(FakeEC2InstanceSchema(), FakeEC2InstanceToAWSAccount()) + + +def test_build_cleanup_queries_no_sub_resource(): + actual_queries: list[str] = build_cleanup_queries(NodeA()) + expected_queries = [ + """ + MATCH (n:NodeA) + MATCH (n)<-[r:POINTS_TO]-(:NodeB) + WHERE r.lastupdated <> $UPDATE_TAG + WITH r LIMIT $LIMIT_SIZE + DELETE r; + """ + ] + assert clean_query_list(actual_queries) == clean_query_list(expected_queries) + + +def test_build_cleanup_queries_no_rels(): + actual_queries: list[str] = build_cleanup_queries(SimpleNodeSchema()) + expected_queries = [] + assert clean_query_list(actual_queries) == clean_query_list(expected_queries)