Skip to content

Commit

Permalink
Skip database calls in core.tx.load() if data list is empty (#1433)
Browse files Browse the repository at this point in the history
### Summary
> Describe your changes.

If the list passed to load() is empty, we now return early to save time
from checking indexes, generating a query string, and talking to the
database.

Also fixes this for AWS' resourcegroupstaggingapi sync.

Since many cartography users likely have nothing in many regions, we
might as well save ourselves some network and database calls.


### Checklist

Provide proof that this works (this makes reviews move faster). Please
perform one or more of the following:
- [x] Update/add unit or integration tests.
- [ ] Include a screenshot showing what the graph looked like before and
after your changes.
- [ ] Include console log trace showing what happened before and after
your changes.

If you are changing a node or relationship:
- [ ] Update the
[schema](https://github.com/lyft/cartography/tree/master/docs/root/modules)
and
[readme](https://github.com/lyft/cartography/blob/master/docs/schema/README.md).

If you are implementing a new intel module:
- [ ] Use the NodeSchema [data
model](https://cartography-cncf.github.io/cartography/dev/writing-intel-modules.html#defining-a-node).

---------

Signed-off-by: Alex Chantavy <[email protected]>
  • Loading branch information
achantavy authored Jan 5, 2025
1 parent 8a19c13 commit 689768b
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cartography/client/core/tx.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ def load(
:param kwargs: Allows additional keyword args to be supplied to the Neo4j query.
:return: None
"""
if len(dict_list) == 0:
# If there is no data to load, save some time.
return
ensure_indexes(neo4j_session, node_schema)
ingestion_query = build_ingestion_query(node_schema)
load_graph_data(neo4j_session, ingestion_query, dict_list, **kwargs)
3 changes: 3 additions & 0 deletions cartography/intel/aws/resourcegroupstaggingapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ def load_tags(
current_aws_account_id: str,
aws_update_tag: int,
) -> None:
if len(tag_data) == 0:
# If there is no data to load, save some time.
return
for tag_data_batch in batch(tag_data, size=100):
neo4j_session.write_transaction(
_load_tags_tx,
Expand Down
Empty file.
Empty file.
19 changes: 19 additions & 0 deletions tests/unit/cartography/client/test_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from unittest.mock import MagicMock

from cartography.client.core.tx import load
from cartography.models.core.nodes import CartographyNodeSchema


def test_load_empty_dict_list():
# Setup
mock_session = MagicMock()
mock_schema = MagicMock(spec=CartographyNodeSchema)
empty_dict_list = []

# Execute
load(mock_session, mock_schema, empty_dict_list)

# Assert
mock_session.run.assert_not_called() # Ensure no database calls were made
# Verify that ensure_indexes was not called since we short-circuit on empty list
mock_session.write_transaction.assert_not_called()
26 changes: 26 additions & 0 deletions tests/unit/cartography/intel/aws/test_resourcegroupstaggingapi.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
from unittest.mock import MagicMock

import cartography.intel.aws.resourcegroupstaggingapi as rgta
import tests.data.aws.resourcegroupstaggingapi as test_data
Expand Down Expand Up @@ -44,3 +45,28 @@ def test_transform_tags():
assert 'resource_id' not in get_resources_response[0]
rgta.transform_tags(get_resources_response, 'ec2:instance')
assert 'resource_id' in get_resources_response[0]


def test_load_tags_empty_data():
"""
Ensure that the load_tags function returns early if the tag_data is empty
"""
# Arrange
mock_neo4j_session = MagicMock()
resource_type = 'ec2:instance'
region = 'us-east-1'
account_id = '123456789012'
update_tag = 123456789

# Act
rgta.load_tags(
neo4j_session=mock_neo4j_session,
tag_data={},
resource_type=resource_type,
region=region,
current_aws_account_id=account_id,
aws_update_tag=update_tag,
)

# Assert
mock_neo4j_session.write_transaction.assert_not_called()

0 comments on commit 689768b

Please sign in to comment.