Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Actually execute TTL delete operations #86

Merged
merged 2 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions nodestream/databases/neo4j/ingest_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
TO_NODE_REF_NAME = "to_node"
RELATIONSHIP_REF_NAME = "rel"
PARAMETER_CORRECTION_REGEX = re.compile(r"\"(params.__\w+)\"")
DELETE_NODE_QUERY = "MATCH (n) WHERE id(n) = id DETACH DELETE n"
DELETE_REL_QUERY = "MATCH ()-[r]->() WHERE id(r) = id DELETE r"


def correct_parameters(f):
Expand Down Expand Up @@ -213,9 +215,7 @@ def generate_batch_update_relationship_query_batch(
]
return QueryBatch(query, params)

def generate_ttl_query_from_configuration(
self, config: TimeToLiveConfiguration
) -> Query:
def generate_ttl_match_query(self, config: TimeToLiveConfiguration) -> Query:
earliest_allowed_time = datetime.utcnow() - timedelta(
hours=config.expiry_in_hours
)
Expand Down Expand Up @@ -243,3 +243,14 @@ def generate_ttl_query_from_configuration(
).return_literal(f"id({ref_name}) as id")

return Query(str(query_builder), params)

def generate_ttl_query_from_configuration(
self, config: TimeToLiveConfiguration
) -> Query:
ttl_match_query = self.generate_ttl_match_query(config)
operation = (
DELETE_NODE_QUERY
if config.graph_object_type == GraphObjectType.NODE
else DELETE_REL_QUERY
)
return ttl_match_query.feed_batched_query(operation)
19 changes: 16 additions & 3 deletions nodestream/databases/neo4j/query.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from dataclasses import dataclass
from typing import Any, Dict, List

UNWIND_QUERY = "UNWIND $batched_parameter_sets as params RETURN params"
COMMIT_QUERY = """
CALL apoc.periodic.iterate(
"UNWIND $batched_parameter_sets as params RETURN params",
$iterable_query,
$batched_query,
{batchsize: 1000, parallel: false, retries: 3, params: {batched_parameter_sets: $batched_parameter_sets}}
{batchsize: 1000, parallel: false, retries: 3, params: $iterate_params}
)
YIELD batches, committedOperations, failedOperations, errorMessages
RETURN batches, committedOperations, failedOperations, errorMessages
Expand All @@ -21,6 +22,17 @@ class Query:
def from_statement(cls, statement: str):
return cls(query_statement=statement, parameters={})

def feed_batched_query(self, batched_query: str) -> "Query":
"""Feed the results of the the query into another query that will be executed in batches."""
return Query(
COMMIT_QUERY,
{
"iterate_params": self.parameters,
"batched_query": batched_query,
"iterable_query": self.query_statement,
},
)


@dataclass(slots=True, frozen=True)
class QueryBatch:
Expand All @@ -31,7 +43,8 @@ def as_query(self) -> Query:
return Query(
COMMIT_QUERY,
{
"batched_parameter_sets": self.batched_parameter_sets,
"iterate_params": self.batched_parameter_sets,
"batched_query": self.query_statement,
"iterable_query": UNWIND_QUERY,
},
)
41 changes: 31 additions & 10 deletions tests/unit/databases/neo4j/test_ingest_query_buiilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
from freezegun import freeze_time
from hamcrest import assert_that, equal_to, equal_to_ignoring_whitespace

from nodestream.databases.neo4j.ingest_query_builder import Neo4jIngestQueryBuilder
from nodestream.databases.neo4j.query import Query, QueryBatch
from nodestream.databases.neo4j.ingest_query_builder import (
DELETE_NODE_QUERY,
DELETE_REL_QUERY,
Neo4jIngestQueryBuilder,
)
from nodestream.databases.neo4j.query import COMMIT_QUERY, Query, QueryBatch
from nodestream.databases.query_executor import (
OperationOnNodeIdentity,
OperationOnRelationshipIdentity,
Expand Down Expand Up @@ -33,8 +37,12 @@ def query_builder():
expiry_in_hours=10,
)
BASIC_NODE_TTL_EXPECTED_QUERY = Query(
"MATCH (x: TestNodeType) WHERE x.last_ingested_at <= $earliest_allowed_time RETURN id(x) as id",
{"earliest_allowed_time": GREATEST_DAY},
COMMIT_QUERY,
{
"iterate_params": {"earliest_allowed_time": GREATEST_DAY},
"batched_query": DELETE_NODE_QUERY,
"iterable_query": "MATCH (x: TestNodeType) WHERE x.last_ingested_at <= $earliest_allowed_time RETURN id(x) as id",
},
)

NODE_TTL_WITH_CUSTOM_QUERY = TimeToLiveConfiguration(
Expand All @@ -44,7 +52,12 @@ def query_builder():
expiry_in_hours=10,
)
NODE_TTL_WITH_CUSTOM_QUERY_EXPECTED_QUERY = Query(
"MATCH (n:TestNodeType) RETURN n", {"earliest_allowed_time": GREATEST_DAY}
COMMIT_QUERY,
{
"iterate_params": {"earliest_allowed_time": GREATEST_DAY},
"batched_query": DELETE_NODE_QUERY,
"iterable_query": NODE_TTL_WITH_CUSTOM_QUERY.custom_query,
},
)

BASIC_REL_TTL = TimeToLiveConfiguration(
Expand All @@ -53,19 +66,27 @@ def query_builder():
expiry_in_hours=10,
)
BASIC_REL_TTL_EXPECTED_QUERY = Query(
"MATCH ()-[x: IS_RELATED_TO]->() WHERE x.last_ingested_at <= $earliest_allowed_time RETURN id(x) as id",
{"earliest_allowed_time": GREATEST_DAY},
COMMIT_QUERY,
{
"iterate_params": {"earliest_allowed_time": GREATEST_DAY},
"iterable_query": "MATCH ()-[x: IS_RELATED_TO]->() WHERE x.last_ingested_at <= $earliest_allowed_time RETURN id(x) as id",
"batched_query": DELETE_REL_QUERY,
},
)

REL_TTL_WITH_CUSTOM_QUERY = TimeToLiveConfiguration(
graph_object_type=GraphObjectType.RELATIONSHIP,
object_type="IS_RELATED_TO",
custom_query="MATCH ()-[x: IS_RELATED_TO]->() RETURN x",
custom_query="MATCH ()-[x: IS_RELATED_TO]->() RETURN id(x) as id",
expiry_in_hours=10,
)
REL_TTL_WITH_CUSTOM_QUERY_EXPECTED_QUERY = Query(
"MATCH ()-[x: IS_RELATED_TO]->() RETURN x",
{"earliest_allowed_time": GREATEST_DAY},
COMMIT_QUERY,
{
"iterate_params": {"earliest_allowed_time": GREATEST_DAY},
"iterable_query": REL_TTL_WITH_CUSTOM_QUERY.custom_query,
"batched_query": DELETE_REL_QUERY,
},
)


Expand Down