-
Notifications
You must be signed in to change notification settings - Fork 0
/
segmentation.py
108 lines (73 loc) · 2.74 KB
/
segmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import numpy as np
import cv2 as cv
#from preprocessing import binary_otsus, deskew
from utilities import projection, save_image
from glob import glob
# def preprocess(image):
# # Maybe we end up using only gray level image.
# gray_img = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
# gray_img = cv.bitwise_not(gray_img)
# binary_img = binary_otsus(gray_img, 0)
# # cv.imwrite('origin.png', gray_img)
# # deskewed_img = deskew(binary_img)
# deskewed_img = deskew(binary_img)
# # cv.imwrite('output.png', deskewed_img)
# # binary_img = binary_otsus(deskewed_img, 0)
# # breakpoint()
# # Visualize
# # breakpoint()
# return deskewed_img
def projection_segmentation(clean_img, axis, cut=3):
segments = []
start = -1
cnt = 0
projection_bins = projection(clean_img, axis)
for idx, projection_bin in enumerate(projection_bins):
if projection_bin != 0:
cnt = 0
if projection_bin != 0 and start == -1:
start = idx
if projection_bin == 0 and start != -1:
cnt += 1
if cnt >= cut:
if axis == 'horizontal':
segments.append(clean_img[max(start-1, 0):idx, :])
elif axis == 'vertical':
segments.append(clean_img[:, max(start-1, 0):idx])
cnt = 0
start = -1
return segments
# Line Segmentation
#----------------------------------------------------------------------------------------
def line_horizontal_projection(image, cut=3):
# Preprocess input image
clean_img = image #preprocess(image)
# Segmentation
lines = projection_segmentation(clean_img, axis='horizontal', cut=cut)
return lines
# Word Segmentation
#----------------------------------------------------------------------------------------
def word_vertical_projection(line_image, cut=3):
line_words = projection_segmentation(line_image, axis='vertical', cut=cut)
line_words.reverse()
return line_words
# def extract_words(img, visual=0):
# lines = line_horizontal_projection(img)
# words = []
# for idx, line in enumerate(lines):
# if visual:
# save_image(line, 'lines', f'line{idx}')
# line_words = word_vertical_projection(line)
# for w in line_words:
# # if len(words) == 585:
# # print(idx)
# words.append((w, line))
# # words.extend(line_words)
# # breakpoint()
# if visual:
# for idx, word in enumerate(words):
# save_image(word[0], 'words', f'word{idx}')
# return words
# if __name__ == "__main__":
# img = cv.imread('../Dataset/scanned/capr196.png')
# extract_words(img, 1)