-
Notifications
You must be signed in to change notification settings - Fork 0
/
punchcard.py
342 lines (309 loc) · 15.9 KB
/
punchcard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
#!/usr/bin/env python
#
# punchcard.py
#
# Copyright (C) 2011: Michael Hamilton
# The code is GPL 3.0(GNU General Public License) ( http://www.gnu.org/copyleft/gpl.html )
#
from PIL import Image
import sys
from optparse import OptionParser
import logging
SPEC_IBM_MODEL_029 = "IBM Model 029 Punch Card" # only one for now
CARD_COLUMNS = 80
CARD_ROWS = 12
# found measurements at http://www.quadibloc.com/comp/cardint.htm
CARD_WIDTH = 7.0 + 3.0/8.0 # Inches
CARD_HEIGHT = 3.25 # Inches
CARD_COL_WIDTH = 0.087 # Inches
CARD_HOLE_WIDTH = 0.055 # Inches IBM, 0.056 Control Data
CARD_ROW_HEIGHT = 0.25 # Inches
CARD_HOLE_HEIGHT = 0.125 # Inches
CARD_TOPBOT_MARGIN = 3.0/16.0 # Inches at top and bottom
CARD_SIDE_MARGIN = 0.2235 # Inches on each side
CARD_SIDE_MARGIN_RATIO = CARD_SIDE_MARGIN/CARD_WIDTH # as proportion of card width (margin/width)
CARD_TOP_MARGIN_RATIO = CARD_TOPBOT_MARGIN/CARD_HEIGHT # as proportion of card height (margin/height)
CARD_ROW_HEIGHT_RATIO = CARD_ROW_HEIGHT/CARD_HEIGHT # as proportion of card height - works
CARD_COL_WIDTH_RATIO = CARD_COL_WIDTH/CARD_WIDTH # as proportion of card height - works
CARD_HOLE_HEIGHT_RATIO = CARD_HOLE_HEIGHT/CARD_HEIGHT # as proportion of card height - works
CARD_HOLE_WIDTH_RATIO = CARD_HOLE_WIDTH/CARD_WIDTH # as a proportion of card width
BRIGHTNESS_THRESHOLD = 200 # pixel brightness value (i.e. (R+G+B)/3)
# Represents a punchcard image plus scanned data
class PunchCard(object):
logger = logging.getLogger('punchcard')
IBM_MODEL_029_KEYPUNCH = """
/&-0123456789ABCDEFGHIJKLMNOPQR/STUVWXYZ:#@'="`.<(+|!$*);^~,%_>? |
12 / O OOOOOOOOO OOOOOO |
11| O OOOOOOOOO OOOOOO |
0| O OOOOOOOOO OOOOOO |
1| O O O O |
2| O O O O O O O O |
3| O O O O O O O O |
4| O O O O O O O O |
5| O O O O O O O O |
6| O O O O O O O O |
7| O O O O O O O O |
8| O O O O OOOOOOOOOOOOOOOOOOOOOOOO |
9| O O O O |
|__________________________________________________________________|"""
translate = None
def create_card_map(self):
if self.translate == None:
self.translate = {}
# Turn the ASCII art sideways and build a hash look up for
# column values, for example:
# (O, , ,O, , , , , , , , ):A
# (O, , , ,O, , , , , , , ):Bpreparation
# (O, , , , ,O, , , , , , ):C
self.rows = self.IBM_MODEL_029_KEYPUNCH[1:].split('\n');
rotated = [[ r[i] for r in self.rows[0:13]] for i in range(5, len(self.rows[0]) - 1)]
for v in rotated:
self.translate[tuple(v[1:])] = v[0]
# generate a range of floats
def drange(self, start, stop, step=1.0):
r = start
while (step >= 0.0 and r < stop) or (step < 0.0 and r > stop):
yield r
r += step
def __init__(self, image, bright=-1, debug=False, xstart=0, xstop=0, ystart=0, ystop=0, xadjust=0):
self.create_card_map()
pass
self.text = ''
self.decoded = []
self.surface = []
self.threshold = 0
self.ymin = ystart
self.ymax = ystop
self.xmin = xstart
self.xmax = xstop
self.xadjust = xadjust
self.image = image
self.pix = image.load()
self._crop()
if debug:
self.logger.setLevel(logging.DEBUG)
else:
self.logger.setLevel(logging.WARN)
self._scan(bright)
# Brightness is the average of RGB values
def _brightness(self, pixel):
return pixel
# For highlighting on the debug dump
def _flip(self, pixel):
return pixel
# The search is started from the "crop" edges.
# Either use crop boundary of the image size or the valyes supplied
# by the command line args
def _crop(self):
self.xsize, self.ysize = self.image.size
if self.xmax == 0:
self.xmax = self.xsize
if self.ymax == 0:
self.ymax = self.ysize
self.midx = self.xmin + (self.xmax - self.xmin) / 2 + self.xadjust
self.midy = self.ymin + (self.ymax - self.ymin) / 2
# heuristic for finding a reasonable cutoff brightness
def _find_threshold_brightness(self):
left = self._brightness(self.pix[self.xmin, self.midy])
right = self._brightness(self.pix[self.xmax - 1, self.midy])
return min(left, right, self.BRIGHTNESS_THRESHOLD) - 10
vals = []
last = 0
for x in range(self.xmin,self.xmax):
val = self._brightness(self.pix[x, self.midy])
if val > last:
left = val
else:
break
last = val
for x in range(self.xmax,self.xmin, -1):
val = self._brightness(self.pix[x, self.midy])
if val > last:
right = val
else:
break
right = val
return min(left, right,200)
for x in range(self.xmin,self.xmax):
val = self._brightness(self.pix[x, self.midy])
vals.append(val)
vals.sort()
last_val = vals[0]
biggest_diff = 0
threshold = 0
for val in vals:
diff = val - last_val
if val > 127 and val < 200 and diff >= 5:
biggest_diff = diff
threshold = val
last_val = val
logger.debug("Threshold diff=%d, brightness=%d", biggest_diff, val)
return threshold - 10
# Find the left and right edges of the data area at probe_y and from that
# figure out the column and hole vertical dimensions at probe_y.
def _find_data_horiz_dimensions(self, probe_y):
left_border, right_border = self.xmin, self.xmax - 1
for x in range(int(self.xmin), int(self.midx)):
if self._brightness(self.pix[x, probe_y]) < self.threshold:
left_border = x
break
for x in range(int(self.xmax)-1, int(self.midx), -1):
if self._brightness(self.pix[x, probe_y]) < self.threshold:
right_border = x
break
width = right_border - left_border
card_side_margin_width = int(width * CARD_SIDE_MARGIN_RATIO)
data_left_x = left_border + card_side_margin_width
#data_right_x = right_border - card_side_margin_width
data_right_x = data_left_x + int((CARD_COLUMNS * width) * CARD_COL_WIDTH/CARD_WIDTH)
col_width = width * CARD_COL_WIDTH_RATIO
hole_width = width * CARD_HOLE_WIDTH_RATIO
if self.logger.isEnabledFor(logging.DEBUG):
# mark left and right edges on the copy
for y in range(int(probe_y - self.ysize/100), int(probe_y + self.ysize/100)):
self.debug_pix[left_border if left_border > 0 else 0,y] = 255
self.debug_pix[right_border if right_border < self.xmax else self.xmax - 1,y] = 255
for x in range(1, int((self.xmax - self.xmin) / 200)):
self.debug_pix[left_border + x, probe_y] = 255
self.debug_pix[right_border - x, probe_y] = 255
return data_left_x, data_right_x, col_width, hole_width
# find the top and bottom of the data area and from that the
# column and hole horizontal dimensions
def _find_data_vert_dimensions(self):
top_border, bottom_border = self.ymin, self.ymax
for y in range(int(self.ymin), int(self.midy)):
if self._brightness(self.pix[self.midx, y]) < self.threshold:
top_border = y
break
for y in range(int(self.ymax) - 1, int(self.midy), -1):
if self._brightness(self.pix[self.midx, y]) < self.threshold:
bottom_border = y
break
card_height = bottom_border - top_border
card_top_margin = int(card_height * CARD_TOP_MARGIN_RATIO)
data_begins = top_border + card_top_margin
hole_height = int(card_height * CARD_HOLE_HEIGHT_RATIO)
data_top_y = data_begins + hole_height / 2
col_height = int(card_height * CARD_ROW_HEIGHT_RATIO)
if self.logger.isEnabledFor(logging.DEBUG):
# mark up the copy with the edges
for x in range(self.xmin, self.xmax-1):
self.debug_pix[x,top_border] = 255
self.debug_pix[x,bottom_border] = 255
if self.logger.isEnabledFor(logging.DEBUG):
# mark search parameters
for x in range(self.midx - self.xsize/20, self.midx + self.xsize/20):
self.debug_pix[x,self.ymin] = 255
self.debug_pix[x,self.ymax - 1] = 255
for y in range(0, self.ymin):
self.debug_pix[self.midx,y] = 255
for y in range(self.ymax - 1, self.ysize-1):
self.debug_pix[self.midx,y] = 255
return data_top_y, data_top_y + col_height * 11, col_height, hole_height
def _scan(self, bright=-1):
if self.logger.isEnabledFor(logging.DEBUG):
# if debugging make a copy we can draw on
self.debug_image = self.image.copy()
self.debug_pix = self.debug_image.load()
self.threshold = bright if bright > 0 else self._find_threshold_brightness()
#x_min, x_max, col_width = self._find_data_horiz_dimensions(image, pix, self.threshold, self.ystart, self.ystop)
y_data_pos, y_data_end, col_height, hole_height = self._find_data_vert_dimensions()
data = {}
# Chads are narrow so find then heuristically by accumulating pixel brightness
# along the row. Should be forgiving if the image is slightly wonky.
y = y_data_pos #- col_height/8
for row_num in range(CARD_ROWS):
probe_y = y + col_height if row_num == 0 else ( y - col_height if row_num == CARD_ROWS -1 else y ) # Line 0 has a corner missing
x_data_left, x_data_right, col_width, hole_width = self._find_data_horiz_dimensions(probe_y)
left_edge = -1 # of a punch-hole
for x in range(x_data_left, x_data_right):
# Chads are tall so we can be sure if we probe around the middle of their height
val = self._brightness(self.pix[x, y])
if val >= self.threshold:
if left_edge == -1:
left_edge = x
if self.logger.isEnabledFor(logging.DEBUG):
self.debug_pix[x,y] = self._flip(self.pix[x,y])
else:
if left_edge > -1:
hole_length = x - left_edge
if hole_length >= hole_width * 0.75:
col_num = int((left_edge + hole_length / 2.0 - x_data_left) / col_width + 0.25)
data[(col_num, row_num)] = hole_length
left_edge = -1
if self.logger.isEnabledFor(logging.DEBUG):
# Plot where holes might be on this row
expected_top_edge = y - hole_height / 2
expected_bottom_edge = y + hole_height / 2
blue = 255 * 256 * 256
for expected_left_edge in self.drange(x_data_left, x_data_right - 1, col_width):
for y_plot in self.drange(expected_top_edge, expected_bottom_edge, 2):
self.debug_pix[expected_left_edge,y_plot] = blue
#self.debug_pix[x + hole_width/2,yline] = 255 * 256 * 256
self.debug_pix[expected_left_edge + hole_width,y_plot] = blue
for x_plot in self.drange(expected_left_edge, expected_left_edge + hole_width):
self.debug_pix[x_plot, expected_top_edge] = blue
self.debug_pix[x_plot, expected_bottom_edge] = blue
y += col_height
if self.logger.isEnabledFor(logging.DEBUG):
self.debug_image.show()
# prevent run-a-way debug shows causing my desktop to run out of memory
raw_input("Press Enter to continue...")
self.decoded = []
# Could fold this loop into the previous one - but would it be faster?
for col in range(0, CARD_COLUMNS):
col_pattern = []
col_surface = []
for row in range(CARD_ROWS):
key = (col, row)
# avergage for 1/3 of a column is greater than the threshold
col_pattern.append('O' if key in data else ' ')
col_surface.append(data[key] if key in data else 0)
tval = tuple(col_pattern)
self.text += self.translate[tval] if tval in self.translate else '@'
self.decoded.append(tval)
self.surface.append(col_surface)
return self
# ASCII art image of card
def dump(self, id, raw_data=False):
print(' Card Dump of Image file:', id, 'Format', 'Raw' if raw_data else 'Dump', 'threshold=', self.threshold)
print(' ' + '123456789-' * int(CARD_COLUMNS/10))
print(' ' + '_' * CARD_COLUMNS + ' ')
print('/' + self.text + '_' * (CARD_COLUMNS - len(self.text)) + '|')
for rnum in range(len(self.decoded[0])):
sys.stdout.write('|')
if raw_data:
for val in self.surface:
sys.stdout.write(("(%d)" % val[rnum]) if val[rnum] != 0 else '.' )
else:
for col in self.decoded:
sys.stdout.write(col[rnum] if col[rnum] == 'O' else '.')
print('|')
print('`' + '-' * CARD_COLUMNS + "'")
print(' ' + '123456789-' * int(CARD_COLUMNS/10))
print('')
if __name__ == '__main__':
main()
def main():
usage = """usage: %prog [options] image [image...]
decode punch card image into ASCII."""
parser = OptionParser(usage)
parser.add_option('-b', '--bright-threshold', type='int', dest='bright', default=-1, help='Brightness (R+G+B)/3, e.g. 127.')
parser.add_option('-s', '--side-margin-ratio', type='float', dest='side_margin_ratio', default=CARD_SIDE_MARGIN_RATIO, help='Manually set side margin ratio (sideMargin/cardWidth).')
parser.add_option('-d', '--dump', action='store_true', dest='dump', help='Output an ASCII-art version of the card.')
parser.add_option('-i', '--display-image', action='store_true', dest='display', help='Display an anotated version of the image.')
parser.add_option('-r', '--dump-raw', action='store_true', dest='dumpraw', help='Output ASCII-art with raw row/column accumulator values.')
parser.add_option('-x', '--x-start', type='int', dest='xstart', default=0, help='Start looking for a card edge at y position (pixels)')
parser.add_option('-X', '--x-stop', type='int', dest='xstop', default=0, help='Stop looking for a card edge at y position')
parser.add_option('-y', '--y-start', type='int', dest='ystart', default=0, help='Start looking for a card edge at y position')
parser.add_option('-Y', '--y-stop', type='int', dest='ystop', default=0, help='Stop looking for a card edge at y position')
parser.add_option('-a', '--adjust-x', type='int', dest='xadjust', default=0, help='Adjust middle edge detect location (pixels)')
(options, args) = parser.parse_args()
for arg in args:
image = Image.open(arg)
card = PunchCard(image, bright=options.bright, debug=options.display, xstart=options.xstart, xstop=options.xstop, ystart=options.ystart, ystop=options.ystop, xadjust=options.xadjust)
print(card.text)
if (options.dump):
card.dump(arg)
if (options.dumpraw):
card.dump(arg, raw_data=True)