forked from sunziping2016/THUCourseSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
divide-unsegmented-captcha.py
executable file
·44 lines (38 loc) · 1.38 KB
/
divide-unsegmented-captcha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/env python3
import argparse
import os
SEGMENTATION_FILE_NAME = "segmentation.txt"
def main():
parser = argparse.ArgumentParser()
parser.add_argument('source', help='source directory')
parser.add_argument('targets', nargs='*', help='target directories')
args = parser.parse_args()
targets = []
for target in args.targets:
path, num = target.split(':')
targets.append((path, int(num)))
solved = set()
try:
with open(os.path.join(args.source, SEGMENTATION_FILE_NAME)) as f:
for line in f:
solved.add(line.strip().split(':')[0])
except FileNotFoundError:
pass
unsolved = []
for filename in sorted(os.listdir(args.source)):
name, ext = os.path.splitext(filename)
name = name.split('.')
if ext == '.jpeg' and len(name) == 2 and len(name[0]) == 32 and filename not in solved:
unsolved.append(filename)
if not targets:
print(f'{len(unsolved)} unsolved.')
return
assert sum([num for path, num in targets]) == len(unsolved), 'size mismatch'
for path, num in targets:
os.makedirs(path, exist_ok=True)
selected = unsolved[:num]
unsolved = unsolved[num:]
for filename in selected:
os.rename(os.path.join(args.source, filename), os.path.join(path, filename))
if __name__ == '__main__':
main()