-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathbibdb_datasets.py
72 lines (56 loc) · 2.06 KB
/
bibdb_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import datetime
from lxltools.datacompiler import Compiler
import os
SCRIPT_DIR = os.path.dirname(__file__) or '.'
BASE = 'https://libris.kb.se/'
compiler = Compiler(base_dir=SCRIPT_DIR,
dataset_id=BASE + 'dataset/bibdb',
context='sys/context/base.jsonld',
record_thing_link='mainEntity',
system_base_iri="",
union='libraries.jsonld.lines',
created="2019-03-14T15:00:00.000Z")
@compiler.dataset
def libraries():
graph = _construct_bibdb_data('sigel=*&holdings=True&org_type=library')
return "/library", "2019-03-14T15:31:17.000Z", graph
@compiler.dataset
def bibliographies():
graph = _construct_bibdb_data('sigel=*&org_type=bibliography')
return "/library", "2019-03-14T19:32:20.000Z", graph
def _construct_bibdb_data(query):
libraries = _fetch_libraries(f'https://bibdb.libris.kb.se/api?{query}')
bidb_context = 'https://bibdb.libris.kb.se/libdb/static/meta/context.jsonld'
return compiler.construct(sources=[
{
"dataset": BASE + "dataset/libraries",
"data": libraries,
"context": [
compiler.load_json(compiler.cache_url(bidb_context)),
{
"@base": "http://bibdb.libris.kb.se/",
# TODO: these are dropped in the source context; fix that and drop this
"date_created": "http://libris.kb.se/def/lib#date_created",
"date_modified": "http://libris.kb.se/def/lib#date_modified",
}
]
}
],
query="source/construct-libraries.rq")
def _fetch_libraries(start_url):
url = start_url
result = []
start = 0
batch = 200
while True:
data = compiler.load_json(compiler.cache_url(url))
libraries = data['libraries']
if libraries:
result += libraries
start += batch
url = f'{start_url}&start={start}'
else:
break
return result
if __name__ == '__main__':
compiler.main()