Skip to content

Commit

Permalink
modify statements query to not filter on item type, filter afterwards…
Browse files Browse the repository at this point in the history
…. this is significantly faster for some unknown reason
  • Loading branch information
stuppie committed Oct 4, 2017
1 parent 4b6da86 commit 0a7b5e8
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
11 changes: 6 additions & 5 deletions lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ def get_reverse_items(qids):
WHERE {
values ?value {{values}}
?item ?propertyclaim ?id .
?property wikibase:propertyType wikibase:WikibaseItem .
?property wikibase:claim ?propertyclaim .
?id ?b ?value .
FILTER(regex(str(?b), "http://www.wikidata.org/prop/statement" ))
Expand All @@ -293,6 +292,7 @@ def get_reverse_items(qids):
result['property'] = result['property'].replace("http://www.wikidata.org/entity/", "wd:")
result['value'] = result['value'].replace("http://www.wikidata.org/entity/", "wd:")
result['id'] = result['id'].replace("http://www.wikidata.org/entity/statement/", "wds:").replace("-", "$", 1)
results = [x for x in results if x['id'].startswith("wds:Q")]
return results


Expand All @@ -316,19 +316,20 @@ def get_forward_items(qids):
WHERE {
values ?item {{values}}
?item ?propertyclaim ?id .
?property wikibase:propertyType wikibase:WikibaseItem .
?property wikibase:claim ?propertyclaim .
?id ?b ?value .
FILTER(regex(str(?b), "http://www.wikidata.org/prop/statement" ))
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}""".replace("{values}", values)
d = execute_sparql_query(query_str)['results']['bindings']
results = [{k: v['value'] for k, v in item.items()} for item in d]
results = [x for x in results if x['value'].startswith("http://www.wikidata.org/entity/")]
for result in results:
result['item'] = result['item'].replace("http://www.wikidata.org/entity/", "wd:")
result['property'] = result['property'].replace("http://www.wikidata.org/entity/", "wd:")
result['value'] = result['value'].replace("http://www.wikidata.org/entity/", "wd:")
result['id'] = result['id'].replace("http://www.wikidata.org/entity/statement/", "wds:").replace("-", "$", 1)
results = [x for x in results if x['id'].startswith("wds:Q")]
return results


Expand Down Expand Up @@ -366,11 +367,11 @@ def filter_statements(datapage, keywords=None, types=None):
datapage = datapage2

if types:
concepts = getConcepts(
frozenset([x['subject']['id'] for x in datapage] + [x['object']['id'] for x in datapage]))
all_qids = frozenset([x['subject']['id'] for x in datapage] + [x['object']['id'] for x in datapage])
concepts = getConcepts(all_qids)
type_map = {k: v['semanticGroup'] for k, v in concepts.items()}
datapage = [x for x in datapage if
any(t in [type_map[x['subject']['id']], type_map[x['object']['id']]] for t in types)]
any(t in [type_map.get(x['subject']['id'],''), type_map.get(x['object']['id'],'')] for t in types)]

return datapage

Expand Down
2 changes: 1 addition & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def always_curie(s):


def always_qid(s):
assert s.startswith("Q") or s.startswith("wd:")
assert s.startswith("Q") or s.startswith("wd:"), s
return s.replace("wd:", "") if s.startswith("wd:") else s

# For future reference : https://github.com/monarch-initiative/SciGraph-docker-monarch-data/blob/master/src/main/resources/monarchLoadConfiguration.yaml.tmpl#L74
Expand Down

0 comments on commit 0a7b5e8

Please sign in to comment.