-
Notifications
You must be signed in to change notification settings - Fork 46
/
test_yolov5.py
66 lines (56 loc) · 2.89 KB
/
test_yolov5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import torch
from only_train_once import OTO
import unittest
import os
import onnxruntime as ort
import numpy as np
OUT_DIR = './cache'
class TestYolov5(unittest.TestCase):
def test_sanity(self, dummy_input=torch.rand(1, 3, 640, 640)):
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
# All parameters in the pretrained Yolov5 are not trainable.
for _, param in model.named_parameters():
param.requires_grad = True
oto = OTO(model, dummy_input)
# Mark a conv-concat and the detection heads as unprunable
# The node_ids may be varying upon different torch version.
oto.mark_unprunable_by_node_ids(
# ['node-229', 'node-329', 'node-443', 'node-553']
['node-229', 'node-581', 'node-471', 'node-359']
)
# The above can be also achieved by
oto.mark_unprunable_by_param_names(
['model.model.model.9.cv1.conv.weight', 'model.model.model.24.m.2.weight', \
'model.model.model.24.m.1.weight', 'model.model.model.24.m.0.weight']
)
# Display param name and shape in dependency graph visualization
oto.visualize(view=False, out_dir=OUT_DIR, display_params=True)
# Compute FLOP and param for full model.
full_flops = oto.compute_flops(in_million=True)['total']
full_num_params = oto.compute_num_params(in_million=True)
optimizer = oto.hesso(
variant='sgd',
lr=0.1
)
oto.random_set_zero_groups()
# YOLOv5 has some trouble to directly load torch model
oto.construct_subnet(
out_dir=OUT_DIR,
ckpt_format='onnx'
)
full_sess = ort.InferenceSession(oto.full_group_sparse_model_path)
full_output = full_sess.run(None, {'onnx::Cast_0': dummy_input.numpy()})
compressed_sess = ort.InferenceSession(oto.compressed_model_path)
compressed_output = compressed_sess.run(None, {'onnx::Cast_0': dummy_input.numpy()})
max_output_diff = np.max(np.abs(full_output[0] - compressed_output[0]))
print("Maximum output difference : ", max_output_diff.item())
self.assertLessEqual(max_output_diff, 1e-3)
full_model_size = os.stat(oto.full_group_sparse_model_path)
compressed_model_size = os.stat(oto.compressed_model_path)
print("Size of full model : ", full_model_size.st_size / (1024 ** 3), "GBs")
print("Size of compress model : ", compressed_model_size.st_size / (1024 ** 3), "GBs")
# Compute FLOP and param for pruned model after oto.construct_subnet()
pruned_flops = oto.compute_flops(in_million=True)['total']
pruned_num_params = oto.compute_num_params(in_million=True)
print("FLOP reduction (%) : ", 1.0 - pruned_flops / full_flops)
print("Param reduction (%) : ", 1.0 - pruned_num_params / full_num_params)