forked from jstraumann/panoptikum
-
Notifications
You must be signed in to change notification settings - Fork 0
/
collect.py
97 lines (74 loc) · 2.94 KB
/
collect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os, csv
from csv import DictWriter
from stats import *
def list_files(dir):
r = {}
for root, dirs, files in os.walk(dir):
if 'thumb' in root: continue
dirs.sort()
for name in files:
fn, ext = os.path.splitext(name)
try:
wn = int(fn.split('_')[0].split(' ')[0].split(',')[0].strip('.+jpg'))
except:
print('Invalid file: %s' % os.path.join(root, name))
continue
r[wn] = {
'path': os.path.join(root, name).strip('./'),
'thumb': os.path.join(root, 'thumb', name).strip('./'),
}
return r
def flatten(xss):
return [x for xs in xss for x in xs]
def update_files(lf, filename='WERKVERZEICHNIS.csv', outputfile='images.csv'):
with open(os.path.join('data',filename), 'r') as csvin:
reader = csv.DictReader(csvin)
fieldnames = reader.fieldnames
if not 'path' in fieldnames: fieldnames.append('path')
if not 'thumb' in fieldnames: fieldnames.append('thumb')
fieldnames.append('Techniken')
fieldnames.append('Motiven')
fieldnames.append('Darstellungsformen')
outputpath = os.path.join('data',outputfile)
print("Writing to %s" % outputpath)
with open(outputpath, 'w+') as csvout:
writer = csv.DictWriter(csvout, fieldnames=fieldnames,
delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writeheader()
print('Scanning images, showing any missing below:')
for r in reader:
try:
imagerow = lf[int(r['Nummer'])]
except:
print(r['Nummer'], end=' ')
continue
r['path'] = imagerow['path']
r['thumb'] = imagerow['thumb']
# TODO: load image to determine dimensions
r['Techniken'] = ' '.join([
r['Technik'],
r['Technik I'],
r['Technik II'],
r['Technik III'],
r['Technik IV'],
])
# Combine FoD and FoP into Fo
r['Technik'] = r['Technik'].replace('FoD', 'Fo')
r['Technik'] = r['Technik'].replace('FoP', 'Fo')
r['Motiven'] = ' '.join(flatten([
r['Motiv I'].split(", "),
r['Motiv II'].split(", "),
r['Motiv III'].split(", "),
r['Motiv IV'].split(", "),
]))
r['Darstellungsformen'] = ' '.join([
r['Darstellungsform'],
r['Darstellungsform I'],
])
r['Jahr'] = r['Jahr'].strip().strip('a')
writer.writerow(r)
print("--- Done.")
if __name__ == '__main__':
lf = list_files('images')
update_files(lf)
update_stats()