forked from BradfordBach/NMSLocator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr.py
82 lines (71 loc) · 3.53 KB
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from PIL import Image
from pytesseract import image_to_string, pytesseract
import os
def ocr_many():
for(dirpath, dirnames, filenames) in os.walk("cropped"):
for file in filenames:
print(os.path.join(dirpath, file))
screenshot_text = image_to_string(Image.open(os.path.join(dirpath, file)))
screenshot_lines = screenshot_text.split('\n')
for line in screenshot_lines:
if "System" in line:
line_split = line.split('System')
if '-l' in line_split:
line_split = line_split.replace('-l', '-I')
if '|' in line_split:
line_split = line_split.replace('|', 'I')
print(line_split[0].strip())
if "REGION" in line:
print(line.split(':')[1].strip())
if "Sell:" in line:
print(line.split('//')[-1].strip())
def ocr_screenshot(file, tesseract):
pytesseract.tesseract_cmd = tesseract
screenshot_text = image_to_string(Image.open(file))
filename = os.path.splitext(os.path.basename(file))[0]
with open("cropped" + os.sep + filename + '.log', "w") as ocr_log:
ocr_log.write(screenshot_text)
if screenshot_text:
screenshot_lines = screenshot_text.split('\n')
system_info = {'system': None, 'region': None, 'econ': None, 'life': None}
for line in screenshot_lines:
if "System" in line:
line_split = line.split('System')
system_info['system'] = fix_common_ocr_issues(line_split[0].strip())
if "REGION" in line:
system_info['region'] = fix_common_ocr_issues(line.split(':')[1].strip())
if "Sell:" in line:
econ_values = ["Declining", "Destitute", "Failing", "Fledgling", "Low Supply", "Struggling", "Unpromising", "Unsuccessful",
"Adequate", "Balanced", "Comfortable", "Developing", "Medium Supply", "Promising", "Satisfactory", "Sustainable",
"Advanced", "Affluent", "Booming", "Flourishing", "High Supply", "Opulent", "Prosperous", "Wealthy"]
#print(line.split('//')[-1].strip())
if line.split('//')[-1].strip() in econ_values:
system_info['econ'] = line.split('//')[-1].strip()
elif "Med" in line.split('//')[-1].strip():
system_info['econ'] = 'Medium Supply'
if "Gek" in line:
system_info['life'] = "Gek"
if "Korvax" in line:
system_info['life'] = "Korvax"
if "Vy'keen" in line:
system_info['life'] = "Vy'keen"
if not system_info['system'] or not system_info['region']:
print('Skipping latest screenshot, no system or region info found.')
return None
else:
return system_info
else:
print('Skipping latest screenshot, no system or region info found.')
return None
def fix_common_ocr_issues(text):
common_problems = {'-l': '-I', '-k': '-K', '|': 'I', ' l ': ' I ', ' Ill': ' III',
' lV': ' IV', ' XVIll': ' XVIII', ' XIl': ' XII', ' XIll': ' XIII', ' VIl': ' VII',
' VIll': ' VIII', ' Il': ' II', ' l': ' I'}
if text[:1] == 'l':
text = list(text)
text[0] = 'I'
text = ''.join(text)
for error, fix in common_problems.items():
if error in text:
text = text.replace(error, fix)
return text