-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqueries.py
104 lines (75 loc) · 3.33 KB
/
queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from neo4j import GraphDatabase
from os import environ
_uri = environ.get('BOLT_URI', 'bolt://localhost:7778')
driver = GraphDatabase.driver(_uri, auth=("neo4j", "root123"))
def _generic_get(tx, query, args=None):
if args:
return tx.run(query, **args)
else:
return tx.run(query)
def get_number_entities():
query = """
MATCH (n) RETURN COUNT(n) as count
"""
with driver.session() as session:
res = session.read_transaction(_generic_get, query)
res = res.single()
return res['count']
def get_counts():
query = """
CALL apoc.meta.stats() YIELD labels
RETURN labels {.Person, .Category, .Decade, .Company, .Movie} AS counts
"""
with driver.session() as session:
res = session.read_transaction(_generic_get, query).single()
return res[0]
def get_entities():
query = """
MATCH (n) RETURN n.uri AS uri, n.name AS name, LABELS(n) AS labels
"""
with driver.session() as session:
res = session.read_transaction(_generic_get, query)
return [record for record in res]
def get_triples():
query = """
MATCH (h)-[r]->(t) RETURN h.uri AS head_uri, TYPE(r) AS relation, t.uri AS tail_uri
"""
with driver.session() as session:
res = session.read_transaction(_generic_get, query)
return [record for record in res]
def get_last_batch(source_uris, seen):
query = """
MATCH (n) WHERE n.uri IN $uris WITH COLLECT(n) AS nLst
CALL particlefiltering(nLst, 0, 100) YIELD nodeId, score
MATCH (n) WHERE n:Movie AND id(n) = nodeId AND NOT n.uri IN $seen RETURN n.uri AS uri, score
ORDER BY score DESC
LIMIT 10
"""
args = {'uris': source_uris, 'seen': seen}
with driver.session() as session:
res = session.read_transaction(_generic_get, query, args)
res = [{'uri': r['uri'], 'score': r['score']} for r in res]
return res
def get_relevant_neighbors(uri_list, seen_uri_list, k=25):
query = """
MATCH (n) WHERE n.uri IN $uris WITH COLLECT(n) AS nLst
CALL particlefiltering(nLst, 0, 100) YIELD nodeId, score
MATCH (n) WHERE id(n) = nodeId AND NOT n.uri IN $seen
WITH DISTINCT id(n) AS id, score, n.name AS name, labels(n) AS l
ORDER BY score DESC
WITH DISTINCT l, collect({id: id, s: score, n: name})[..$k] AS topk
UNWIND topk AS t
WITH t.id AS id, t.s AS score, t.n AS name
OPTIONAL MATCH (r)<--(m:Movie) WHERE id(r) = id AND NOT r:Movie
WITH algo.asNode(id) AS r, m, score
ORDER BY m.weight DESC
WITH r, collect(DISTINCT m)[..5] as movies, score
RETURN r:Director AS director, r:Actor AS actor, r.imdb AS imdb, r:Subject AS subject, r:Movie as movie,
r:Company AS company, r:Decade AS decade, r.uri AS uri, r.name AS name, r:Genre as genre,
r:Person as person, r:Category as category, r.image AS image, r.year AS year, movies, score
"""
args = {'uris': uri_list, 'seen': seen_uri_list, 'k': k}
with driver.session() as session:
res = session.read_transaction(_generic_get, query, args)
res = [r for r in res]
return res