-
Notifications
You must be signed in to change notification settings - Fork 0
/
detector.cpp
400 lines (381 loc) · 16.5 KB
/
detector.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
#include "detector.h"
#define IMAGE_LEN 960
#define IMAGE_LEN_F 960.0f
bool Detector::parseYolov5(const Blob::Ptr &blob, float cof_threshold,
std::vector<Rect> &o_rect, std::vector<float> &o_rect_cof,
std::vector<int> &classId)
{
//如果输入图像大小为[1,3,640,640],那么三个输出的尺寸为3*80*80*(4+1)*80 | 40*40 | 20*20
//利用blob->getTensorDesc().getDims()获取输出头的参数,包括w,h,c,batch_size,class_num
const int net_grid_h = static_cast<int>(blob->getTensorDesc().getDims()[2]); // 80/40/20
const int net_grid_w = static_cast<int>(blob->getTensorDesc().getDims()[3]); // 80/40/20
const int batch_size = static_cast<int>(blob->getTensorDesc().getDims()[0]); // batch_size
const int anchor_num = static_cast<int>(blob->getTensorDesc().getDims()[1]); // anchor_num,3
// item_size为类别个数,对于COCO而言,输出为80
const int item_size = static_cast<int>(blob->getTensorDesc().getDims()[4]); // (4+1+class_num)
//cout << "anchor scale: " << net_grid_h << endl;
//cout << "item_size: " << item_size << endl;
//根据输出的尺度,利用get_anchors方法获得相应的锚框
std::vector<int> anchors = get_anchors(net_grid_h);
//输出分类内存分配
InferenceEngine::LockedMemory<const void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob)->rmap();
const float *output_blob = blobMapped.as<float *>();
int net_grid = net_grid_h;
std::size_t gi = net_grid * item_size;
std::size_t ggi = net_grid * gi;
std::size_t anchor_n = anchor_num;
//输出解析与预测框解码部分
for (int n = 0; n < anchor_num; ++n)
for (int i = 0; i < net_grid; ++i)
for (int j = 0; j < net_grid; ++j) {
//获取输出置信度
double box_prob = sigmoid(output_blob[n*ggi + i * gi + j * item_size + 4]);
if (box_prob < cof_threshold) continue;
double obj_prob = sigmoid(output_blob[n*ggi + i * gi + j * item_size + 5]);
// 获取输出目标框
//注意此处输出为中心点坐标,需要转化为角点坐标
double x = output_blob[n*ggi + i * gi + j * item_size + 0];
double y = output_blob[n*ggi + i * gi + j * item_size + 1];
double w = output_blob[n*ggi + i * gi + j * item_size + 2];
double h = output_blob[n*ggi + i * gi + j * item_size + 3];
//获取输出类别索引
double max_prob = 0;
int idx = 0;
for (int t = 5; t < item_size; ++t) {
double tp = sigmoid(output_blob[n*net_grid*net_grid*item_size + i * net_grid*item_size + j * item_size + t]);
if (tp > max_prob) {
max_prob = tp;
idx = t - 5;
}
}
float cof = box_prob * max_prob;
//对于边框置信度小于阈值的边框,不关心其他数值,不进行计算减少计算量
if (cof < cof_threshold) continue;
//预测框解码
x = (sigmoid(x) * 2 - 0.5 + j)* IMAGE_LEN_F / net_grid;
y = (sigmoid(y) * 2 - 0.5 + i)* IMAGE_LEN_F / net_grid;
w = pow(sigmoid(w) * 2, 2) * anchors[n * 2];
h = pow(sigmoid(h) * 2, 2) * anchors[n * 2 + 1];
double r_x = x - w / 2;
double r_y = y - h / 2;
cv::Rect rect = cv::Rect(round(r_x), round(r_y), round(w), round(h));
//将结果保存在vector中
o_rect.push_back(rect);
o_rect_cof.push_back(cof);
classId.push_back(idx);
}
if (o_rect.size() == 0) return false;
else return true;
}
bool Detector::init(std::string xml_path) {
try {
_xml_path = xml_path;
// 加载推理引擎Core,该引擎需要从当前路径加载plugins.xml文件
InferenceEngine::Core ie;
//查询支持硬件设备
std::vector<std::string> availableDev = ie.GetAvailableDevices();
for (int i = 0; i < availableDev.size(); i++) {
cout << "supported device name: " << availableDev[i].c_str() << endl;
}
//从IR加载检测模型
InferenceEngine::CNNNetwork cnnNetwork = ie.ReadNetwork(_xml_path);
cnnNetwork.setBatchSize(1);
// 网络输入头参数设置
InferenceEngine::InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
InferenceEngine::InputInfo::Ptr input = inputInfo.begin()->second;
_input_name = inputInfo.begin()->first;//输入头的名称
input->setPrecision(InferenceEngine::Precision::FP32);
input->getInputData()->setLayout(InferenceEngine::Layout::NCHW);
InferenceEngine::ICNNNetwork::InputShapes inputShapes = cnnNetwork.getInputShapes();
InferenceEngine::SizeVector& inSizeVector = inputShapes.begin()->second;
cnnNetwork.reshape(inputShapes);
//网络输出设置,YOLOV5包含三个输出
_outputinfo = InferenceEngine::OutputsDataMap(cnnNetwork.getOutputsInfo());
for (auto& output : _outputinfo) {
//cout << output.first << endl;
output.second->setPrecision(InferenceEngine::Precision::FP32);
}
//获取可执行网络
_network = ie.LoadNetwork(cnnNetwork, "CPU");
return true;
}
catch (std::exception & e) {
return false;
}
}
bool Detector::process_frame(Mat &curFrame, std::vector<Object> &detectedObj)
{
cv::Mat resize_img = letterBox(curFrame);
cout << "input image_size: " << resize_img.size() << endl;
std::size_t img_size = IMAGE_LEN * IMAGE_LEN;
//建立推理请求
InferenceEngine::InferRequest infer_request = _network.CreateInferRequest();
InferenceEngine::Blob::Ptr frameBlob = infer_request.GetBlob(_input_name);
InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(frameBlob)->wmap();
float* blob_data = blobMapped.as<float*>();
//nchw
//将输入图像关联到Blob
for (std::size_t row = 0; row < IMAGE_LEN; row++) {
for (std::size_t col = 0; col < IMAGE_LEN; col++) {
for (std::size_t ch = 0; ch < 3; ch++) {
//遍历输入图像的每个像素保存到blob_data,目前不理解blob_data的索引值
blob_data[img_size*ch + row * IMAGE_LEN + col] = float(resize_img.at<cv::Vec3b>(row, col)[ch] / 255.0f);
}
}
}
//执行推理
infer_request.Infer();
//获取各层结果,保存到origin_rect,origin_rect_cof,classId
std::vector<Rect> origin_rect;
std::vector<float> origin_rect_cof;
std::vector<int> classId;
//大规模计算之前先收集指针
std::vector<InferenceEngine::Blob::Ptr> blobs;
//blobs保存推理结果,用于输入到parseYolov5方法进行解析
for (auto &output : _outputinfo) {
auto output_name = output.first;
InferenceEngine::Blob::Ptr blob = infer_request.GetBlob(output_name);
blobs.push_back(blob);
}
//cout << "blobs.size: " << blobs.size() << endl;
//blob的大小为3
for (int i = 0; i < 3; i++) {
float th = 0.5;
//TODO:根据网格大小使用不同阈值,可自己调配
if (i == 0) { th = 0.55; } //小目标严格要求
if (i == 1) { th = 0.45; } //大目标放宽要求
if (i == 2) { th = 0.40; }
//用于保存解析结果的临时vector
std::vector<cv::Rect> origin_rect_temp;
std::vector<float> origin_rect_cof_temp;
//解析blobs
parseYolov5(blobs[i], th, origin_rect_temp, origin_rect_cof_temp, classId);
origin_rect.insert(origin_rect.end(), origin_rect_temp.begin(), origin_rect_temp.end());
origin_rect_cof.insert(origin_rect_cof.end(), origin_rect_cof_temp.begin(), origin_rect_cof_temp.end());
}
//后处理操作获得最终结果
std::vector<int> final_id;
//TODO:此处的阈值需要调整
//调用opencv dnn模块中的NMSBoxes进行非极大抑制处理
cv::dnn::NMSBoxes(origin_rect, origin_rect_cof, _conf_threshold, _nms_area_threshold, final_id);
//根据final_id获取最终结果
for (int i = 0; i < final_id.size(); ++i) {
cv::Rect resize_rect = origin_rect[final_id[i]];
//调用detect2origin方法将框映射到原图
cv::Rect rawrect = detect2origin(resize_rect, ratio, topPad, leftPad);
//结果以数据结构Object保存到vector
//cout << "is: " << final_id[i] << endl;
cout << "is: " << classId[final_id[i]] << endl;
detectedObj.push_back(Object{
origin_rect_cof[final_id[i]],
className[classId[final_id[i]]],rawrect,
});
}
return true;
}
bool Detector::slice_process_frame(Mat &curFrame, std::vector<Object> &detectedObj)
{
//获取最后结果,保存到summary_rect,summary_rect_cof,summary_classId
std::vector<Rect> summary_rect;
std::vector<float> summary_rect_cof;
std::vector<int> summary_classId;
vector<vector<int>> slice_bboxes = getboxs(curFrame.rows,curFrame.cols, _slice_height, _slice_width, _overlap_height_ratio, _overlap_width_ratio);
cout << "input split number: " << slice_bboxes.size() << endl;
for (auto slice_bbox : slice_bboxes) {
Mat image_pil_slice;
curFrame(Rect(slice_bbox[0],slice_bbox[1],slice_bbox[2]-slice_bbox[0],slice_bbox[3]-slice_bbox[1])).copyTo(image_pil_slice);
cv::Mat resize_img = letterBox(image_pil_slice);
//cout << "input image_size: " << resize_img.size() << endl;
std::size_t img_size = IMAGE_LEN * IMAGE_LEN;
//建立推理请求
InferenceEngine::InferRequest infer_request = _network.CreateInferRequest();
InferenceEngine::Blob::Ptr frameBlob = infer_request.GetBlob(_input_name);
InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(frameBlob)->wmap();
float* blob_data = blobMapped.as<float*>();
//nchw
//将输入图像关联到Blob
for (std::size_t row = 0; row < IMAGE_LEN; row++) {
for (std::size_t col = 0; col < IMAGE_LEN; col++) {
for (std::size_t ch = 0; ch < 3; ch++) {
//遍历输入图像的每个像素保存到blob_data,目前不理解blob_data的索引值
blob_data[img_size*ch + row * IMAGE_LEN + col] = float(resize_img.at<cv::Vec3b>(row, col)[ch] / 255.0f);
}
}
}
//执行推理
infer_request.Infer();
//获取各层结果,保存到origin_rect,origin_rect_cof,classId
std::vector<Rect> origin_rect;
std::vector<float> origin_rect_cof;
std::vector<int> classId;
//大规模计算之前先收集指针
std::vector<InferenceEngine::Blob::Ptr> blobs;
//blobs保存推理结果,用于输入到parseYolov5方法进行解析
for (auto &output : _outputinfo) {
auto output_name = output.first;
InferenceEngine::Blob::Ptr blob = infer_request.GetBlob(output_name);
blobs.push_back(blob);
}
//cout << "blobs.size: " << blobs.size() << endl;
//blob的大小为3
for (int i = 0; i < 3; i++) {
float th = 0.5;
//TODO:根据网格大小使用不同阈值,可自己调配
if (i == 0) { th = 0.55; } //小目标严格要求
if (i == 1) { th = 0.45; } //大目标放宽要求
if (i == 2) { th = 0.40; }
//用于保存解析结果的临时vector
std::vector<cv::Rect> origin_rect_temp;
std::vector<float> origin_rect_cof_temp;
std::vector<int> classId_temp;
//解析blobs
parseYolov5(blobs[i], th, origin_rect_temp, origin_rect_cof_temp, classId_temp);
origin_rect.insert(origin_rect.end(), origin_rect_temp.begin(), origin_rect_temp.end());
origin_rect_cof.insert(origin_rect_cof.end(), origin_rect_cof_temp.begin(), origin_rect_cof_temp.end());
classId.insert(classId.end(), classId_temp.begin(), classId_temp.end());
}
//后处理操作获得最终结果
std::vector<int> final_id;
//TODO:此处的阈值需要调整
//调用opencv dnn模块中的NMSBoxes进行非极大抑制处理
cv::dnn::NMSBoxes(origin_rect, origin_rect_cof, _conf_threshold, _nms_area_threshold, final_id);
//cout << "origin_rect: " << origin_rect.size() << " _conf_threshold: " << _conf_threshold << " _nms_area_threshold: " << _nms_area_threshold << " final_id: " << final_id.size() << endl;
//根据final_id获取最终结果
for (int i = 0; i < final_id.size(); ++i) {
cv::Rect resize_rect = origin_rect[final_id[i]];
//调用detect2origin方法将框映射到原图
cv::Rect raw_rect = detect2origin(resize_rect, ratio, topPad, leftPad);
cv::Rect os_rect = Rect(raw_rect.x + slice_bbox[0], raw_rect.y + slice_bbox[1], raw_rect.width, raw_rect.height);
summary_rect.push_back(os_rect);
summary_rect_cof.push_back(origin_rect_cof[final_id[i]]);
summary_classId.push_back(classId[final_id[i]]);
//结果以数据结构Object保存到vector
//cout << "is: " << final_id[i] << endl;
}
}
//最后nms操作
std::vector<int> end_id;
cv::dnn::NMSBoxes(summary_rect, summary_rect_cof, _conf_threshold, _nms_area_threshold, end_id);
//cout << "summary_rect: " << summary_rect.size() << " _conf_threshold: " << _conf_threshold << " _nms_area_threshold: " << _nms_area_threshold << " end_id: " << end_id.size() << endl;
for (int i = 0; i < end_id.size(); ++i) {
//结果以数据结构Object保存到vector
//cout << "is: " << classId[end_id[i]] << endl;
detectedObj.push_back(Object{
summary_rect_cof[end_id[i]],
className[summary_classId[end_id[i]]],summary_rect[end_id[i]],
});
}
return true;
}
vector<vector<int>> Detector::getboxs(int image_height,int image_width,int slice_height,int slice_width,float overlap_height_ratio, float overlap_width_ratio) {
vector<vector<int>> slice_bboxes;
int y_max = 0;
int y_min = 0;
int y_overlap = std::round(overlap_height_ratio * slice_height);
int x_overlap = std::round(overlap_width_ratio * slice_width);
while (y_max < image_height)
{
int x_min = 0;
int x_max = 0;
y_max = y_min + slice_height;
while (x_max < image_width)
{
x_max = x_min + slice_width;
if (y_max > image_height or x_max > image_width) {
int xmax = std::min(image_width, x_max);
int ymax = std::min(image_height, y_max);
int xmin = std::max(0, xmax - slice_width);
int ymin = std::max(0, ymax - slice_height);
vector<int> temp = { xmin, ymin, xmax, ymax };
slice_bboxes.push_back(temp);
}
else {
vector<int> temp = { x_min, y_min, x_max, y_max };
slice_bboxes.push_back(temp);
}
x_min = x_max - x_overlap;
}
y_min = y_max - y_overlap;
}
return slice_bboxes;
}
double Detector::sigmoid(double x) {
return (1 / (1 + exp(-x)));
}
const int anchorBig = IMAGE_LEN / 8; //8倍下采样
const int anchorMid = IMAGE_LEN / 16; //16倍下采样
const int anchorSml = IMAGE_LEN / 32; //32倍下采样
/*yolov5s.yaml内容
anchors:
- [10,13,16,30,33,23] # P3/8
- [30,61,62,45,59,119] # P4/16
- [116,90,156,198,373,326] # P5/32
const int aBig[6] = { 2,2,3,3,3,3 };
const int aMid[6] = { 4,5,5,6,7,9 };
const int aSml[6] = { 13,13,25,26,48,113 };
*/
const int aBig[6] = { 2,2,3,3,3,3 };
const int aMid[6] = { 4,5,8,6,7,9 };
const int aSml[6] = { 10,13,25,26,48,113 };
//获取锚框
std::vector<int> Detector::get_anchors(int net_grid)
{
std::vector<int> anchors(6);
if (net_grid == anchorBig) { anchors.insert(anchors.begin(), aBig, aBig + 6); }
else if (net_grid == anchorMid) { anchors.insert(anchors.begin(), aMid, aMid + 6); }
else if (net_grid == anchorSml) { anchors.insert(anchors.begin(), aSml, aSml + 6); }
return anchors;
}
//图像缩放与填充
cv::Mat Detector::letterBox(Mat src)
{
if (src.empty()) { std::cout << "input image invalid" << endl; return cv::Mat(); }
//以下为带边框图像生成
int in_w = src.cols;
int in_h = src.rows;
int tar_w = IMAGE_LEN;
int tar_h = IMAGE_LEN;
//哪个缩放比例小选哪个
ratio = std::min(float(tar_h) / in_h, float(tar_w) / in_w);
int inside_w = std::round(in_w * ratio);
int inside_h = std::round(in_h * ratio);
int pad_w = tar_w - inside_w;
int pad_h = tar_h - inside_h;
//内层图像resize
cv::Mat resize_img;
cv::resize(src, resize_img, cv::Size(inside_w, inside_h)); //最小的Resize
//cout << "input image_size: " << resize_img.size() << endl;
cv::cvtColor(resize_img, resize_img, cv::COLOR_BGR2RGB);
pad_w = pad_w / 2;
pad_h = pad_h / 2;
//cout << "padding: " << pad_w << " " << pad_h << endl;
//外层边框填充灰色
topPad = int(std::round(pad_h - 0.1));
btmPad = int(std::round(pad_h + 0.1));
leftPad = int(std::round(pad_w - 0.1));
rightPad = int(std::round(pad_w + 0.1));
//cout << "padding: " << topPad<<" "<<btmPad<< " " << leftPad<< " " << rightPad << endl;
cv::copyMakeBorder(resize_img, resize_img, topPad, btmPad, leftPad, rightPad, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
//cout << "input image_size: " << resize_img.size() << endl;
//if (resize_img.cols!= tar_w) {
//cv::resize(resize_img, resize_img, cv::Size(IMAGE_LEN, IMAGE_LEN)); //输出标准化
//}
//if (resize_img.rows!= tar_h) {
//cv::resize(resize_img, resize_img, cv::Size(IMAGE_LEN, IMAGE_LEN)); //输出标准化
//}
return resize_img;
}
//还原
//从detect得到的xywh转换回到原图xywh
cv::Rect Detector::detect2origin(const Rect &det_rect, float rate_to, int top, int left)
{
//detect坐标转换到内部纯图坐标
int inside_x = det_rect.x - left;
int inside_y = det_rect.y - top;
int ox = std::round(float(inside_x) / rate_to);
int oy = std::round(float(inside_y) / rate_to);
int ow = std::round(float(det_rect.width) / rate_to);
int oh = std::round(float(det_rect.height) / rate_to);
cv::Rect origin_rect(ox, oy, ow, oh);
return origin_rect;
}