-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_es.py
43 lines (35 loc) · 1.45 KB
/
check_es.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
from nuxeo.client import Nuxeo
import json
API_BASE = 'https://nuxeo.cdlib.org/Nuxeo/site'
API_PATH = 'api/v1'
NUXEO_PASSWORD = os.environ.get('NUXEO_PASS')
'''
reads in a json file such as the one created by list_missing_paths.py
outputs a json file `still_missing.json` that lists docs still missing
from the elasticsearch index
'''
with open('all_missing_20220211.json', 'r') as f:
missing = json.load(f)
#trashed = [m for m in missing if 'trashed' in m['path']]
nuxeo = Nuxeo(
auth=('Administrator', NUXEO_PASSWORD),
host=API_BASE,
api_path=API_PATH
)
still_missing = []
for doc in missing:
NXQL = f"SELECT * FROM Document WHERE ecm:uuid = '{doc['uid']}'"
response = nuxeo.documents.query(opts={'query': NXQL})
if not doc['path'].startswith('/default-domain/workspaces/templatesamples/') \
and not doc['path'].startswith('/asset-library/UCOP/Aggie') \
and doc['path'] != '/default-domain/UserWorkspaces/barrett-ucsc-edu' \
and not doc['path'].startswith('/asset-library/workspaces/Nuxeo Marketing Content') \
and not doc['path'].startswith('/asset-library/UCM/Aggie') \
and not doc['path'].startswith('/asset-library/UCSB/Aggie') \
and not 'trashed' in doc['path'] \
and len(response['entries']) == 0:
still_missing.append(doc)
with open('still_missing.json', 'w') as f:
f.write(json.dumps(still_missing))
print(f"total still missing from elasticsearch: {len(still_missing)}")