From 6af86391ffdc5f7ef5ab2662caf2984d072fc910 Mon Sep 17 00:00:00 2001 From: aneeshdurg Date: Thu, 15 Feb 2024 17:52:50 -0600 Subject: [PATCH] Implement length/path in filter --- .gitignore | 1 + README.md | 7 ++++--- pyproject.toml | 4 ++-- spycy/dfsmatcher.py | 14 ++++++++++++-- spycy/functions/scalar_fns.py | 23 +++++++++++++++++++++-- test/tck/.gitignore | 1 + test/tck/expected_failures.txt | 7 ++----- 7 files changed, 43 insertions(+), 14 deletions(-) create mode 100644 test/tck/.gitignore diff --git a/.gitignore b/.gitignore index 0f50067..b10bec9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ dist/ build/ +venv/ diff --git a/README.md b/README.md index 338d224..37bf94e 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,10 @@ pip install spycy_aneeshdurg To build it yourself, from the root of this repo, run: ```bash -python3 -m build -cd dist/ -pip install *.whl +# Optional - create and enter a venv +# python3 -m venv venv +# source ./venv/bin/activate # this command varies from shell to shell +pip install . ``` ## Usage: diff --git a/pyproject.toml b/pyproject.toml index 4fa1333..dc03f1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,13 @@ build-backend = "hatchling.build" [project] name = "spycy_aneeshdurg" -version = "0.0.2" +version = "0.0.3" authors = [ { name="Aneesh Durg", email="aneeshdurg17@gmail.com" }, ] description = "openCypher implemented in python" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", diff --git a/spycy/dfsmatcher.py b/spycy/dfsmatcher.py index a4e713d..f5be814 100644 --- a/spycy/dfsmatcher.py +++ b/spycy/dfsmatcher.py @@ -9,7 +9,7 @@ from spycy.gen.CypherParser import CypherParser from spycy.graph import EdgeType, Graph, NodeType from spycy.matcher import MatchedEdge, Matcher, MatchResult, MatchResultSet -from spycy.types import Edge, Node +from spycy.types import Edge, Node, Path @dataclass @@ -458,6 +458,7 @@ def match( for i in range(len(results)): tmp_vars = variables.copy() current_row = MatchResult() + # TODO - only bind names that are actually needed for the filter for nodeid, node in pgraph.nodes.items(): datanode = results.node_ids_to_data_ids[nodeid][i] current_row.node_ids_to_data_ids[nodeid] = datanode @@ -472,7 +473,16 @@ def match( else: wrapped_edge = Edge[EdgeType](dataedge) tmp_vars[edge.name] = wrapped_edge - # TODO - bind in path names + for path_name, path in pgraph.paths.items(): + p = Path[NodeType, EdgeType]([], []) + for nodeid in path.nodes: + node = current_row.node_ids_to_data_ids[nodeid] + p.nodes.append(node) + for edgeid in path.edges: + edge = current_row.edge_ids_to_data_ids[edgeid] + p.edges.append(edge) + tmp_vars[path_name] = p + filter_passed = matcher.expr_eval.evaluate( pd.DataFrame([tmp_vars]), graph, diff --git a/spycy/functions/scalar_fns.py b/spycy/functions/scalar_fns.py index 3ad4e80..7baaf3c 100644 --- a/spycy/functions/scalar_fns.py +++ b/spycy/functions/scalar_fns.py @@ -4,7 +4,7 @@ import pandas as pd from spycy.errors import ExecutionError -from spycy.types import Edge, FunctionContext, Node +from spycy.types import Edge, FunctionContext, Node, Path def coalesce(params: List[pd.Series], fnctx: FunctionContext) -> pd.Series: @@ -51,7 +51,26 @@ def last(params: List[pd.Series], fnctx: FunctionContext) -> pd.Series: def length(params: List[pd.Series], fnctx: FunctionContext) -> pd.Series: - raise AssertionError("length unimplemented") + if len(params) > 1: + raise ExecutionError("Invalid number of arguments to length") + + output = [] + for el in params[0]: + if el is pd.NA: + output.append(pd.NA) + elif isinstance(el, list): + output.append(len(el)) + elif isinstance(el, Path): + length_ = 0 + for edge in el.edges: + if isinstance(edge, list): + length_ += len(edge) + else: + length_ += 1 + output.append(length_) + else: + raise ExecutionError(f"TypeError::length expected list, got {type(el)}") + return pd.Series(output, dtype="Int64") def properties(params: List[pd.Series], fnctx: FunctionContext) -> pd.Series: diff --git a/test/tck/.gitignore b/test/tck/.gitignore new file mode 100644 index 0000000..92bdd18 --- /dev/null +++ b/test/tck/.gitignore @@ -0,0 +1 @@ +behave/ diff --git a/test/tck/expected_failures.txt b/test/tck/expected_failures.txt index d833771..053eee3 100644 --- a/test/tck/expected_failures.txt +++ b/test/tck/expected_failures.txt @@ -57,8 +57,6 @@ TESTS-clauses.delete.Delete5.xml:[5] Detach delete nodes from nested map/list TESTS-clauses.delete.Delete5.xml:[6] Delete relationships from nested map/list TESTS-clauses.delete.Delete5.xml:[7] Delete paths from nested map/list TESTS-clauses.delete.Delete5.xml:[9] Failing when deleting an integer expression -TESTS-clauses.match-where.MatchWhere1.xml:[12] Filter path with path length predicate on multi variables with one binding -TESTS-clauses.match-where.MatchWhere1.xml:[13] Filter path with false path length predicate on multi variables with one binding TESTS-clauses.match-where.MatchWhere1.xml:[15] Fail on aggregation in WHERE TESTS-clauses.match-where.MatchWhere2.xml:[2] Filter node with conjunctive multi-part property predicates on multi variables with multiple bindings TESTS-clauses.match-where.MatchWhere4.xml:[2] Join with disjunctive multi-part predicates including patterns @@ -156,14 +154,12 @@ TESTS-clauses.return.Return5.xml:[1] DISTINCT inside aggregation should work wit TESTS-clauses.return.Return5.xml:[3] DISTINCT inside aggregation should work with nested lists in maps TESTS-clauses.return.Return5.xml:[4] DISTINCT inside aggregation should work with nested lists of maps in maps TESTS-clauses.return.Return5.xml:[5] Aggregate on list values -TESTS-clauses.return.Return6.xml:[13] Returning the minimum length of paths TESTS-clauses.return.Return6.xml:[15] Using `rand()` in aggregations TESTS-clauses.return.Return6.xml:[16] Aggregation on complex expressions TESTS-clauses.return.Return6.xml:[17] Handle constants and parameters inside an expression which contains an aggregation expression TESTS-clauses.return.Return6.xml:[20] Fail if not returned variables are used inside an expression which contains an aggregation expression TESTS-clauses.return.Return6.xml:[21] Fail if more complex expressions, even if returned, are used inside expression which contains an aggregation expression TESTS-clauses.return.Return6.xml:[4] Support multiple divisions in aggregate function -TESTS-clauses.return.Return6.xml:[8] Handle aggregation on functions TESTS-clauses.set.Set1.xml:[10] Failing when setting a list of maps as a property TESTS-clauses.set.Set1.xml:[8] Ignore null when setting property TESTS-clauses.set.Set2.xml:[1] Setting a node property to null removes the existing property @@ -362,7 +358,8 @@ TESTS-expressions.path.Path1.xml:[1] `nodes()` on null path TESTS-expressions.path.Path2.xml:[1] Return relationships by fetching them from the path TESTS-expressions.path.Path2.xml:[2] Return relationships by fetching them from the path - starting from the end TESTS-expressions.path.Path2.xml:[3] `relationships()` on null path -TESTS-expressions.path.Path3.xml:[1] Return a var length path of length zero +TESTS-expressions.path.Path3.xml:[2] Failing when using `length()` on a node +TESTS-expressions.path.Path3.xml:[3] Failing when using `length()` on a relationship TESTS-expressions.pattern.Pattern1.xml:[10] Fail on introducing unbounded variables in pattern -- @1.1 TESTS-expressions.pattern.Pattern1.xml:[10] Fail on introducing unbounded variables in pattern -- @1.10 TESTS-expressions.pattern.Pattern1.xml:[10] Fail on introducing unbounded variables in pattern -- @1.11