-
Notifications
You must be signed in to change notification settings - Fork 39
/
mmocr_sam.py
140 lines (133 loc) · 4.87 KB
/
mmocr_sam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
from argparse import ArgumentParser
import cv2
import numpy as np
import torch
from matplotlib import pyplot as plt
# MMOCR
from mmocr.apis.inferencers import MMOCRInferencer
from mmocr.utils import poly2bbox
# SAM
from segment_anything import SamPredictor, sam_model_registry
def parse_args():
parser = ArgumentParser()
parser.add_argument(
'--inputs',
type=str,
default=
'/media/jiangqing/jqssd/projects/research/UniText/datasets/TextSeg_Ori/train/ori_img',
help='Input image file or folder path.')
parser.add_argument(
'--outdir',
type=str,
default='results/',
help='Output directory of results.')
# MMOCR parser
parser.add_argument(
'--det',
type=str,
default=
'mmocr_dev/configs/textdet/dbnetpp/dbnetpp_swinv2_base_w16_in21k.py',
help='Pretrained text detection algorithm. It\'s the path to the '
'config file or the model name defined in metafile.')
parser.add_argument(
'--det-weights',
type=str,
# required=True,
default='checkpoints/mmocr/db_swin_mix_pretrain.pth',
help='Path to the custom checkpoint file of the selected det model.')
parser.add_argument(
'--rec',
type=str,
default='mmocr_dev/configs/textrecog/abinet/abinet_20e_st-an_mj.py',
help='Pretrained text recognition algorithm. It\'s the path to the '
'config file or the model name defined in metafile.')
parser.add_argument(
'--rec-weights',
type=str,
# required=True,
default=
'checkpoints/mmocr/abinet_20e_st-an_mj_20221005_012617-ead8c139.pth',
help='Path to the custom checkpoint file of the selected recog model.')
parser.add_argument(
'--device',
type=str,
default='cuda',
help='Device used for inference. '
'If not specified, the available device will be automatically used.')
# SAM Parser
parser.add_argument(
"--sam_checkpoint",
type=str,
# required=True,
default='checkpoints/sam/sam_vit_h_4b8939.pth',
help="path to checkpoint file")
parser.add_argument(
"--sam_type",
type=str,
default='vit_h',
help="path to checkpoint file")
parser.add_argument(
"--show", action='store_true', help="whether to show the result")
args = parser.parse_args()
return args
def show_mask(mask, ax, random_color=False):
if random_color:
color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
else:
color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
h, w = mask.shape[-2:]
mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
ax.imshow(mask_image)
if __name__ == '__main__':
# Build MMOCR
args = parse_args()
mmocr_inferencer = MMOCRInferencer(
args.det,
args.det_weights,
args.rec,
args.rec_weights,
device=args.device)
# Build SAM
sam = sam_model_registry[args.sam_type](checkpoint=args.sam_checkpoint)
sam.to(device=args.device)
sam_predictor = SamPredictor(sam)
# Run
if not os.path.exists(args.outdir):
os.makedirs(args.outdir)
ori_inputs = mmocr_inferencer._inputs_to_list(args.inputs)
for i, ori_input in enumerate(ori_inputs):
print(f'Processing {ori_input} [{i + 1}]/[{len(ori_inputs)}]')
img = cv2.imread(ori_input)
# MMOCR inference
result = mmocr_inferencer(
img, save_vis=True, out_dir=args.outdir)['predictions'][0]
rec_texts = result['rec_texts']
det_polygons = result['det_polygons']
det_bboxes = torch.tensor(np.array([poly2bbox(poly) for poly in det_polygons]),
device=sam_predictor.device)
transformed_boxes = sam_predictor.transform.apply_boxes_torch(
det_bboxes, img.shape[:2])
# SAM inference
sam_predictor.set_image(img, image_format='BGR')
masks, _, _ = sam_predictor.predict_torch(
point_coords=None,
point_labels=None,
boxes=transformed_boxes,
multimask_output=False,
)
# Show results
plt.figure(figsize=(10, 10))
# convert img to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
for mask, rec_text, polygon in zip(masks, rec_texts, det_polygons):
show_mask(mask.cpu(), plt.gca(), random_color=True)
polygon = np.array(polygon).reshape(-1, 2)
# convert polygon to closed polygon
polygon = np.concatenate([polygon, polygon[:1]], axis=0)
plt.plot(polygon[:, 0], polygon[:, 1], color='r', linewidth=2)
plt.text(polygon[0, 0], polygon[0, 1], rec_text, color='r')
if args.show:
plt.show()
plt.savefig(os.path.join(args.outdir, f'{i}.png'))