From f34194c015612b3c5096af0dbf72ff2db8fe1689 Mon Sep 17 00:00:00 2001 From: Zhiqiang Wang Date: Sat, 31 Jul 2021 01:22:16 +0800 Subject: [PATCH] Add ncnn deployment examples (#145) * Add ncnn deployment examples * fix:add space to depth function (#146) * Fixing lint * Fixing C++ return bug * Fixing lint and add more tests for space_to_depth * Fixing TypeError of torch.Size * Adding onnx export tools * Refactor YOLODeployFriendly * Move export_onnx.py to ncnn/tools * Adapt to yolov5 * Remove tools * Add yolort ncnn param examples and minor fixes * Rename to yolort-opt.param * Fixing lint Co-authored-by: xiguadong <55774832+xiguadong@users.noreply.github.com> --- deployment/ncnn/CMakeLists.txt | 25 + deployment/ncnn/README.md | 48 ++ deployment/ncnn/export_onnx.py | 62 +++ deployment/ncnn/main.cpp | 509 ++++++++++++++++++ deployment/ncnn/tools/__init__.py | 0 .../ncnn/tools/yolort_deploy_friendly.py | 72 +++ deployment/ncnn/yolort-opt.param | 178 ++++++ test/test_models_common.py | 11 + yolort/models/common.py | 9 + 9 files changed, 914 insertions(+) create mode 100644 deployment/ncnn/CMakeLists.txt create mode 100644 deployment/ncnn/README.md create mode 100644 deployment/ncnn/export_onnx.py create mode 100644 deployment/ncnn/main.cpp create mode 100644 deployment/ncnn/tools/__init__.py create mode 100644 deployment/ncnn/tools/yolort_deploy_friendly.py create mode 100644 deployment/ncnn/yolort-opt.param create mode 100644 test/test_models_common.py diff --git a/deployment/ncnn/CMakeLists.txt b/deployment/ncnn/CMakeLists.txt new file mode 100644 index 00000000..839b141b --- /dev/null +++ b/deployment/ncnn/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.14) + +project(yolort_ncnn) + +find_package(OpenCV REQUIRED) + +# If the package has been found, several variables will +# be set, you can find the full list with descriptions +# in the OpenCVConfig.cmake file. +# Print some message showing some of them +message(STATUS "OpenCV library status:") +message(STATUS " config: ${OpenCV_DIR}") +message(STATUS " version: ${OpenCV_VERSION}") +message(STATUS " libraries: ${OpenCV_LIBS}") +message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") + +find_package(ncnn REQUIRED) + +FILE(GLOB YOLO_SOURCE_FILES *.cpp) + +add_executable(yolort_ncnn ${YOLO_SOURCE_FILES}) + +target_compile_features(yolort_ncnn PUBLIC cxx_range_for) + +target_link_libraries(yolort_ncnn ncnn ${OpenCV_LIBS}) diff --git a/deployment/ncnn/README.md b/deployment/ncnn/README.md new file mode 100644 index 00000000..fcca9f08 --- /dev/null +++ b/deployment/ncnn/README.md @@ -0,0 +1,48 @@ +# Ncnn Inference + +The ncnn inference for `yolort`, both GPU and CPU are supported. + +## Dependencies + +- Ubuntu 18.04 +- ncnn +- OpenCV 3.4+ + +## Usage + +1. First, Setup the environment variables. + + ```bash + export TORCH_PATH=$(dirname $(python -c "import torch; print(torch.__file__)")) + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TORCH_PATH/lib/ + ``` + +1. First, compile `ncnn` using the following scripts. + + ```bash + git clone --recursive git@github.com:Tencent/ncnn.git + cd ncnn + mkdir build && cd build + cmake -DCMAKE_BUILD_TYPE=Release -DNCNN_SYSTEM_GLSLANG=ON -DNCNN_BUILD_EXAMPLES=ON .. # Set -DNCNN_VULKAN=ON if you're using VULKAN + make -j4 + make install + ``` + + Or follow the [official instructions](https://github.com/Tencent/ncnn/wiki/how-to-build) to install ncnn. + +1. Then compile the source code. + + ```bash + cd deployment/ncnn + mkdir build && cd build + cmake .. -Dncnn_DIR=/lib/cmake/ncnn/ + make + ``` + +_Note: you have to change to your machine's directory, it is the directory that contains ncnnConfig.cmake, if you are following the above operations, you should set it to <./ncnn/build/install>_ + +1. Now, you can infer your own images with ncnn. + + ```bash + ./yolort_ncnn ../../../test/assets/zidane.jpg + ``` diff --git a/deployment/ncnn/export_onnx.py b/deployment/ncnn/export_onnx.py new file mode 100644 index 00000000..8d290bac --- /dev/null +++ b/deployment/ncnn/export_onnx.py @@ -0,0 +1,62 @@ +# Copyright (c) 2021, Zhiqiang Wang. All Rights Reserved. +import argparse +import torch +from tools.yolort_deploy_friendly import yolov5s_r40_deploy_ncnn + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default='./yolov5s.pt', + help='weights path') + parser.add_argument('--output_path', type=str, default='./yolov5s.onnx', + help='path of exported onnx') + parser.add_argument('--img_size', nargs='+', type=int, default=[640, 640], + help='image (height, width)') + parser.add_argument('--num_classes', type=int, default=80, + help='number of classes') + parser.add_argument('--batch_size', type=int, default=1, + help='batch size') + parser.add_argument('--device', default='cpu', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--half', action='store_true', + help='FP16 half-precision export') + parser.add_argument('--dynamic', action='store_true', + help='ONNX: dynamic axes') + parser.add_argument('--simplify', action='store_true', + help='ONNX: simplify model') + parser.add_argument('--opset', type=int, default=11, + help='ONNX: opset version') + return parser + + +def cli_main(): + parser = get_parser() + args = parser.parse_args() + print(args) + export_onnx(args) + + +def export_onnx(args): + + model = yolov5s_r40_deploy_ncnn( + pretrained=True, + num_classes=args.num_classes, + ) + img = torch.rand(args.batch_size, 3, 640, 640) + outputs = model(img) + assert len(outputs) == 3 + + torch.onnx.export( + model, + img, + args.output_path, + verbose=False, + opset_version=args.opset, + do_constant_folding=True, + input_names=['images'], + output_names=['h1', 'h2', 'h3'], + ) + + +if __name__ == "__main__": + cli_main() diff --git a/deployment/ncnn/main.cpp b/deployment/ncnn/main.cpp new file mode 100644 index 00000000..c72f2d77 --- /dev/null +++ b/deployment/ncnn/main.cpp @@ -0,0 +1,509 @@ +// Tencent is pleased to support the open source community by making ncnn available. +// +// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved. +// +// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// https://opensource.org/licenses/BSD-3-Clause +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#include "layer.h" +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#else +#include +#include +#include +#endif +#include +#include +#include + +class YOLOv5Focus : public ncnn::Layer +{ +public: + YOLOv5Focus() + { + one_blob_only = true; + } + + virtual int forward( + const ncnn::Mat& bottom_blob, + ncnn::Mat& top_blob, + const ncnn::Option& opt) const + { + int w = bottom_blob.w; + int h = bottom_blob.h; + int channels = bottom_blob.c; + + int outw = w / 2; + int outh = h / 2; + int outc = channels * 4; + + top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < outc; p++) + { + const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2); + float* outptr = top_blob.channel(p); + + for (int i = 0; i < outh; i++) + { + for (int j = 0; j < outw; j++) + { + *outptr = *ptr; + + outptr += 1; + ptr += 2; + } + + ptr += w; + } + } + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(YOLOv5Focus) + +struct Object +{ + cv::Rect_ rect; + int label; + float prob; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_ inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace( + std::vector& faceobjects, + int left, + int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes( + const std::vector& faceobjects, + std::vector& picked, + float nms_threshold) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static inline float sigmoid(float x) +{ + return static_cast(1.f / (1.f + exp(-x))); +} + +static void generate_proposals( + const ncnn::Mat& anchors, + int stride, + const ncnn::Mat& in_pad, + const ncnn::Mat& feat_blob, + float prob_threshold, + std::vector& objects) +{ + const int num_grid = feat_blob.h; + + int num_grid_x; + int num_grid_y; + if (in_pad.w > in_pad.h) + { + num_grid_x = in_pad.w / stride; + num_grid_y = num_grid / num_grid_x; + } + else + { + num_grid_y = in_pad.h / stride; + num_grid_x = num_grid / num_grid_y; + } + + const int num_class = feat_blob.w - 5; + + const int num_anchors = anchors.w / 2; + + for (int q = 0; q < num_anchors; q++) + { + const float anchor_w = anchors[q * 2]; + const float anchor_h = anchors[q * 2 + 1]; + + const ncnn::Mat feat = feat_blob.channel(q); + + for (int i = 0; i < num_grid_y; i++) + { + for (int j = 0; j < num_grid_x; j++) + { + const float* featptr = feat.row(i * num_grid_x + j); + + // find class index with max class score + int class_index = 0; + float class_score = -FLT_MAX; + for (int k = 0; k < num_class; k++) + { + float score = featptr[5 + k]; + if (score > class_score) + { + class_index = k; + class_score = score; + } + } + + float box_score = featptr[4]; + + float confidence = sigmoid(box_score) * sigmoid(class_score); + + if (confidence >= prob_threshold) + { + // yolov5/models/yolo.py Detect forward + // y = x[i].sigmoid() + // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy + // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + + float dx = sigmoid(featptr[0]); + float dy = sigmoid(featptr[1]); + float dw = sigmoid(featptr[2]); + float dh = sigmoid(featptr[3]); + + float pb_cx = (dx * 2.f - 0.5f + j) * stride; + float pb_cy = (dy * 2.f - 0.5f + i) * stride; + + float pb_w = pow(dw * 2.f, 2) * anchor_w; + float pb_h = pow(dh * 2.f, 2) * anchor_h; + + float x0 = pb_cx - pb_w * 0.5f; + float y0 = pb_cy - pb_h * 0.5f; + float x1 = pb_cx + pb_w * 0.5f; + float y1 = pb_cy + pb_h * 0.5f; + + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = x1 - x0; + obj.rect.height = y1 - y0; + obj.label = class_index; + obj.prob = confidence; + + objects.push_back(obj); + } + } + } + } +} + +static int detect_yolov5(const cv::Mat& bgr, std::vector& objects) +{ + ncnn::Net yolov5; + + yolov5.opt.use_vulkan_compute = true; + // yolov5.opt.use_bf16_storage = true; + + yolov5.register_custom_layer("YOLOv5Focus", YOLOv5Focus_layer_creator); + + // original pretrained model from https://github.com/ultralytics/yolov5 + // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models + yolov5.load_param("yolort-opt.param"); + yolov5.load_model("yolort-opt.bin"); + + const int target_size = 640; + const float prob_threshold = 0.25f; + const float nms_threshold = 0.45f; + + int img_w = bgr.cols; + int img_h = bgr.rows; + + // letterbox pad to multiple of 32 + int w = img_w; + int h = img_h; + float scale = 1.f; + if (w > h) + { + scale = (float)target_size / w; + w = target_size; + h = h * scale; + } + else + { + scale = (float)target_size / h; + h = target_size; + w = w * scale; + } + + ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, + img_w, img_h, w, h); + + // pad to target_size rectangle + // yolov5/utils/datasets.py letterbox + int wpad = (w + 31) / 32 * 32 - w; + int hpad = (h + 31) / 32 * 32 - h; + ncnn::Mat in_pad; + ncnn::copy_make_border( + in, + in_pad, + hpad / 2, + hpad - hpad / 2, + wpad / 2, + wpad - wpad / 2, + ncnn::BORDER_CONSTANT, + 114.f); + + const float norm_vals[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f}; + in_pad.substract_mean_normalize(0, norm_vals); + + ncnn::Extractor ex = yolov5.create_extractor(); + + ex.input("images", in_pad); + + std::vector proposals; + + // anchor setting from yolov5/models/yolov5s.yaml + + // stride 8 + { + ncnn::Mat out; + ex.extract("h1", out); + + ncnn::Mat anchors(6); + anchors[0] = 10.f; + anchors[1] = 13.f; + anchors[2] = 16.f; + anchors[3] = 30.f; + anchors[4] = 33.f; + anchors[5] = 23.f; + + std::vector objects8; + generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8); + + proposals.insert(proposals.end(), objects8.begin(), objects8.end()); + } + + // stride 16 + { + ncnn::Mat out; + ex.extract("h2", out); + + ncnn::Mat anchors(6); + anchors[0] = 30.f; + anchors[1] = 61.f; + anchors[2] = 62.f; + anchors[3] = 45.f; + anchors[4] = 59.f; + anchors[5] = 119.f; + + std::vector objects16; + generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16); + + proposals.insert(proposals.end(), objects16.begin(), objects16.end()); + } + + // stride 32 + { + ncnn::Mat out; + ex.extract("h3", out); + + ncnn::Mat anchors(6); + anchors[0] = 116.f; + anchors[1] = 90.f; + anchors[2] = 156.f; + anchors[3] = 198.f; + anchors[4] = 373.f; + anchors[5] = 326.f; + + std::vector objects32; + generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32); + + proposals.insert(proposals.end(), objects32.begin(), objects32.end()); + } + + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector picked; + nms_sorted_bboxes(proposals, picked, nms_threshold); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x - (wpad / 2)) / scale; + float y0 = (objects[i].rect.y - (hpad / 2)) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale; + + // clip + x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +static void draw_objects(const cv::Mat& bgr, const std::vector& objects) +{ + static const char* class_names[] = { + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", + "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", + "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", + "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", + "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", + "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", + "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", + "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" + }; + + cv::Mat image = bgr.clone(); + + for (size_t i = 0; i < objects.size(); i++) + { + const Object& obj = objects[i]; + + fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob, + obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height); + + cv::rectangle(image, obj.rect, cv::Scalar(255, 0, 0)); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100); + + int baseLine = 0; + cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + + int x = obj.rect.x; + int y = obj.rect.y - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, label_size.height + baseLine)), + cv::Scalar(255, 255, 255), -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + } +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [imagepath]\n", argv[0]); + return -1; + } + + const char* imagepath = argv[1]; + + cv::Mat m = cv::imread(imagepath, 1); + if (m.empty()) + { + fprintf(stderr, "cv::imread %s failed\n", imagepath); + return -1; + } + + std::vector objects; + detect_yolov5(m, objects); + + draw_objects(m, objects); + + return 0; +} diff --git a/deployment/ncnn/tools/__init__.py b/deployment/ncnn/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/deployment/ncnn/tools/yolort_deploy_friendly.py b/deployment/ncnn/tools/yolort_deploy_friendly.py new file mode 100644 index 00000000..0d011799 --- /dev/null +++ b/deployment/ncnn/tools/yolort_deploy_friendly.py @@ -0,0 +1,72 @@ +# Copyright (c) 2021, Zhiqiang Wang. All Rights Reserved. +from torch import nn, Tensor + +from torchvision.models.utils import load_state_dict_from_url + +from yolort.models.yolo import YOLO, model_urls +from yolort.models.backbone_utils import darknet_pan_backbone + +from typing import Any, List, Optional + + +class YOLODeployFriendly(YOLO): + """ + Deployment Friendly Wrapper of YOLO. + """ + def __init__( + self, + backbone: nn.Module, + num_classes: int, + # Anchor parameters + anchor_grids: Optional[List[List[float]]] = None, + anchor_generator: Optional[nn.Module] = None, + head: Optional[nn.Module] = None, + ): + super().__init__(backbone, num_classes, anchor_grids=anchor_grids, + anchor_generator=anchor_generator, head=head) + + def forward(self, samples: Tensor): + """ + Arguments: + samples (Tensor): batched images, of shape [batch_size x 3 x H x W] + """ + # get the features from the backbone + features = self.backbone(samples) + + # compute the yolo heads outputs using the features + head_outputs = self.head(features) + return head_outputs + + +def yolov5s_r40_deploy_ncnn( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 80, + **kwargs: Any, +) -> YOLODeployFriendly: + """ + Deployment friendly Wrapper of yolov5s for ncnn. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + backbone_name = 'darknet_s_r4_0' + weights_name = 'yolov5_darknet_pan_s_r40_coco' + depth_multiple = 0.33 + width_multiple = 0.5 + version = 'r4.0' + + backbone = darknet_pan_backbone(backbone_name, depth_multiple, width_multiple, version=version) + + model = YOLODeployFriendly(backbone, num_classes, **kwargs) + if pretrained: + if model_urls.get(weights_name, None) is None: + raise ValueError(f"No checkpoint is available for model {weights_name}") + state_dict = load_state_dict_from_url(model_urls[weights_name], progress=progress) + model.load_state_dict(state_dict) + + del model.anchor_generator + del model.post_process + + return model diff --git a/deployment/ncnn/yolort-opt.param b/deployment/ncnn/yolort-opt.param new file mode 100644 index 00000000..5aaeb8a5 --- /dev/null +++ b/deployment/ncnn/yolort-opt.param @@ -0,0 +1,178 @@ +7767517 +176 200 +Input images 0 1 images +YOLOv5Focus focus 1 1 images 401 +Convolution Conv_41 1 1 401 731 0=32 1=3 4=1 5=1 6=3456 +Swish Mul_43 1 1 731 405 +Convolution Conv_44 1 1 405 734 0=64 1=3 3=2 4=1 5=1 6=18432 +Swish Mul_46 1 1 734 409 +Split splitncnn_0 1 2 409 409_splitncnn_0 409_splitncnn_1 +Convolution Conv_47 1 1 409_splitncnn_1 737 0=32 1=1 5=1 6=2048 +Swish Mul_49 1 1 737 413 +Split splitncnn_1 1 2 413 413_splitncnn_0 413_splitncnn_1 +Convolution Conv_50 1 1 413_splitncnn_1 740 0=32 1=1 5=1 6=1024 +Swish Mul_52 1 1 740 417 +Convolution Conv_53 1 1 417 743 0=32 1=3 4=1 5=1 6=9216 +Swish Mul_55 1 1 743 421 +BinaryOp Add_56 2 1 413_splitncnn_0 421 422 +Convolution Conv_57 1 1 409_splitncnn_0 746 0=32 1=1 5=1 6=2048 +Swish Mul_59 1 1 746 426 +Concat Concat_60 2 1 422 426 427 +Convolution Conv_61 1 1 427 749 0=64 1=1 5=1 6=4096 +Swish Mul_63 1 1 749 431 +Convolution Conv_64 1 1 431 752 0=128 1=3 3=2 4=1 5=1 6=73728 +Swish Mul_66 1 1 752 435 +Split splitncnn_2 1 2 435 435_splitncnn_0 435_splitncnn_1 +Convolution Conv_67 1 1 435_splitncnn_1 755 0=64 1=1 5=1 6=8192 +Swish Mul_69 1 1 755 439 +Split splitncnn_3 1 2 439 439_splitncnn_0 439_splitncnn_1 +Convolution Conv_70 1 1 439_splitncnn_1 758 0=64 1=1 5=1 6=4096 +Swish Mul_72 1 1 758 443 +Convolution Conv_73 1 1 443 761 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_75 1 1 761 447 +BinaryOp Add_76 2 1 439_splitncnn_0 447 448 +Split splitncnn_4 1 2 448 448_splitncnn_0 448_splitncnn_1 +Convolution Conv_77 1 1 448_splitncnn_1 764 0=64 1=1 5=1 6=4096 +Swish Mul_79 1 1 764 452 +Convolution Conv_80 1 1 452 767 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_82 1 1 767 456 +BinaryOp Add_83 2 1 448_splitncnn_0 456 457 +Split splitncnn_5 1 2 457 457_splitncnn_0 457_splitncnn_1 +Convolution Conv_84 1 1 457_splitncnn_1 770 0=64 1=1 5=1 6=4096 +Swish Mul_86 1 1 770 461 +Convolution Conv_87 1 1 461 773 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_89 1 1 773 465 +BinaryOp Add_90 2 1 457_splitncnn_0 465 466 +Convolution Conv_91 1 1 435_splitncnn_0 776 0=64 1=1 5=1 6=8192 +Swish Mul_93 1 1 776 470 +Concat Concat_94 2 1 466 470 471 +Convolution Conv_95 1 1 471 779 0=128 1=1 5=1 6=16384 +Swish Mul_97 1 1 779 475 +Split splitncnn_6 1 2 475 475_splitncnn_0 475_splitncnn_1 +Convolution Conv_98 1 1 475_splitncnn_1 782 0=256 1=3 3=2 4=1 5=1 6=294912 +Swish Mul_100 1 1 782 479 +Split splitncnn_7 1 2 479 479_splitncnn_0 479_splitncnn_1 +Convolution Conv_101 1 1 479_splitncnn_1 785 0=128 1=1 5=1 6=32768 +Swish Mul_103 1 1 785 483 +Split splitncnn_8 1 2 483 483_splitncnn_0 483_splitncnn_1 +Convolution Conv_104 1 1 483_splitncnn_1 788 0=128 1=1 5=1 6=16384 +Swish Mul_106 1 1 788 487 +Convolution Conv_107 1 1 487 791 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_109 1 1 791 491 +BinaryOp Add_110 2 1 483_splitncnn_0 491 492 +Split splitncnn_9 1 2 492 492_splitncnn_0 492_splitncnn_1 +Convolution Conv_111 1 1 492_splitncnn_1 794 0=128 1=1 5=1 6=16384 +Swish Mul_113 1 1 794 496 +Convolution Conv_114 1 1 496 797 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_116 1 1 797 500 +BinaryOp Add_117 2 1 492_splitncnn_0 500 501 +Split splitncnn_10 1 2 501 501_splitncnn_0 501_splitncnn_1 +Convolution Conv_118 1 1 501_splitncnn_1 800 0=128 1=1 5=1 6=16384 +Swish Mul_120 1 1 800 505 +Convolution Conv_121 1 1 505 803 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_123 1 1 803 509 +BinaryOp Add_124 2 1 501_splitncnn_0 509 510 +Convolution Conv_125 1 1 479_splitncnn_0 806 0=128 1=1 5=1 6=32768 +Swish Mul_127 1 1 806 514 +Concat Concat_128 2 1 510 514 515 +Convolution Conv_129 1 1 515 809 0=256 1=1 5=1 6=65536 +Swish Mul_131 1 1 809 519 +Split splitncnn_11 1 2 519 519_splitncnn_0 519_splitncnn_1 +Convolution Conv_132 1 1 519_splitncnn_1 812 0=512 1=3 3=2 4=1 5=1 6=1179648 +Swish Mul_134 1 1 812 523 +Convolution Conv_135 1 1 523 815 0=256 1=1 5=1 6=131072 +Swish Mul_137 1 1 815 527 +Split splitncnn_12 1 4 527 527_splitncnn_0 527_splitncnn_1 527_splitncnn_2 527_splitncnn_3 +Pooling MaxPool_138 1 1 527_splitncnn_3 528 1=5 3=2 5=1 +Pooling MaxPool_139 1 1 527_splitncnn_2 529 1=9 3=4 5=1 +Pooling MaxPool_140 1 1 527_splitncnn_1 530 1=13 3=6 5=1 +Concat Concat_141 4 1 527_splitncnn_0 528 529 530 531 +Convolution Conv_142 1 1 531 818 0=512 1=1 5=1 6=524288 +Swish Mul_144 1 1 818 535 +Split splitncnn_13 1 2 535 535_splitncnn_0 535_splitncnn_1 +Convolution Conv_145 1 1 535_splitncnn_1 821 0=256 1=1 5=1 6=131072 +Swish Mul_147 1 1 821 539 +Convolution Conv_148 1 1 539 824 0=256 1=1 5=1 6=65536 +Swish Mul_150 1 1 824 543 +Convolution Conv_151 1 1 543 827 0=256 1=3 4=1 5=1 6=589824 +Swish Mul_153 1 1 827 547 +Convolution Conv_154 1 1 535_splitncnn_0 830 0=256 1=1 5=1 6=131072 +Swish Mul_156 1 1 830 551 +Concat Concat_157 2 1 547 551 552 +Convolution Conv_158 1 1 552 833 0=512 1=1 5=1 6=262144 +Swish Mul_160 1 1 833 556 +Convolution Conv_161 1 1 556 836 0=256 1=1 5=1 6=131072 +Swish Mul_163 1 1 836 560 +Split splitncnn_14 1 2 560 560_splitncnn_0 560_splitncnn_1 +Interp Resize_165 1 1 560_splitncnn_1 565 0=1 1=2.000000e+00 2=2.000000e+00 +Concat Concat_166 2 1 565 519_splitncnn_0 566 +Split splitncnn_15 1 2 566 566_splitncnn_0 566_splitncnn_1 +Convolution Conv_167 1 1 566_splitncnn_1 839 0=128 1=1 5=1 6=65536 +Swish Mul_169 1 1 839 570 +Convolution Conv_170 1 1 570 842 0=128 1=1 5=1 6=16384 +Swish Mul_172 1 1 842 574 +Convolution Conv_173 1 1 574 845 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_175 1 1 845 578 +Convolution Conv_176 1 1 566_splitncnn_0 848 0=128 1=1 5=1 6=65536 +Swish Mul_178 1 1 848 582 +Concat Concat_179 2 1 578 582 583 +Convolution Conv_180 1 1 583 851 0=256 1=1 5=1 6=65536 +Swish Mul_182 1 1 851 587 +Convolution Conv_183 1 1 587 854 0=128 1=1 5=1 6=32768 +Swish Mul_185 1 1 854 591 +Split splitncnn_16 1 2 591 591_splitncnn_0 591_splitncnn_1 +Interp Resize_187 1 1 591_splitncnn_1 596 0=1 1=2.000000e+00 2=2.000000e+00 +Concat Concat_188 2 1 596 475_splitncnn_0 597 +Split splitncnn_17 1 2 597 597_splitncnn_0 597_splitncnn_1 +Convolution Conv_189 1 1 597_splitncnn_1 857 0=64 1=1 5=1 6=16384 +Swish Mul_191 1 1 857 601 +Convolution Conv_192 1 1 601 860 0=64 1=1 5=1 6=4096 +Swish Mul_194 1 1 860 605 +Convolution Conv_195 1 1 605 863 0=64 1=3 4=1 5=1 6=36864 +Swish Mul_197 1 1 863 609 +Convolution Conv_198 1 1 597_splitncnn_0 866 0=64 1=1 5=1 6=16384 +Swish Mul_200 1 1 866 613 +Concat Concat_201 2 1 609 613 614 +Convolution Conv_202 1 1 614 869 0=128 1=1 5=1 6=16384 +Swish Mul_204 1 1 869 618 +Split splitncnn_18 1 2 618 618_splitncnn_0 618_splitncnn_1 +Convolution Conv_205 1 1 618_splitncnn_1 872 0=128 1=3 3=2 4=1 5=1 6=147456 +Swish Mul_207 1 1 872 622 +Concat Concat_208 2 1 622 591_splitncnn_0 623 +Split splitncnn_19 1 2 623 623_splitncnn_0 623_splitncnn_1 +Convolution Conv_209 1 1 623_splitncnn_1 875 0=128 1=1 5=1 6=32768 +Swish Mul_211 1 1 875 627 +Convolution Conv_212 1 1 627 878 0=128 1=1 5=1 6=16384 +Swish Mul_214 1 1 878 631 +Convolution Conv_215 1 1 631 881 0=128 1=3 4=1 5=1 6=147456 +Swish Mul_217 1 1 881 635 +Convolution Conv_218 1 1 623_splitncnn_0 884 0=128 1=1 5=1 6=32768 +Swish Mul_220 1 1 884 639 +Concat Concat_221 2 1 635 639 640 +Convolution Conv_222 1 1 640 887 0=256 1=1 5=1 6=65536 +Swish Mul_224 1 1 887 644 +Split splitncnn_20 1 2 644 644_splitncnn_0 644_splitncnn_1 +Convolution Conv_225 1 1 644_splitncnn_1 890 0=256 1=3 3=2 4=1 5=1 6=589824 +Swish Mul_227 1 1 890 648 +Concat Concat_228 2 1 648 560_splitncnn_0 649 +Split splitncnn_21 1 2 649 649_splitncnn_0 649_splitncnn_1 +Convolution Conv_229 1 1 649_splitncnn_1 893 0=256 1=1 5=1 6=131072 +Swish Mul_231 1 1 893 653 +Convolution Conv_232 1 1 653 896 0=256 1=1 5=1 6=65536 +Swish Mul_234 1 1 896 657 +Convolution Conv_235 1 1 657 899 0=256 1=3 4=1 5=1 6=589824 +Swish Mul_237 1 1 899 661 +Convolution Conv_238 1 1 649_splitncnn_0 902 0=256 1=1 5=1 6=131072 +Swish Mul_240 1 1 902 665 +Concat Concat_241 2 1 661 665 666 +Convolution Conv_242 1 1 666 905 0=512 1=1 5=1 6=262144 +Swish Mul_244 1 1 905 670 +Convolution Conv_245 1 1 618_splitncnn_0 671 0=255 1=1 5=1 6=32640 +Reshape Reshape_259 1 1 671 689 0=-1 1=85 2=3 +Permute Transpose_260 1 1 689 h1 0=1 +Convolution Conv_261 1 1 644_splitncnn_0 691 0=255 1=1 5=1 6=65280 +Reshape Reshape_275 1 1 691 709 0=-1 1=85 2=3 +Permute Transpose_276 1 1 709 h2 0=1 +Convolution Conv_277 1 1 670 711 0=255 1=1 5=1 6=130560 +Reshape Reshape_291 1 1 711 729 0=-1 1=85 2=3 +Permute Transpose_292 1 1 729 h3 0=1 diff --git a/test/test_models_common.py b/test/test_models_common.py new file mode 100644 index 00000000..f263a7ee --- /dev/null +++ b/test/test_models_common.py @@ -0,0 +1,11 @@ +import pytest +import torch +from yolort.models.common import focus_transform, space_to_depth + + +@pytest.mark.parametrize('n, b, h, w', [(1, 3, 480, 640), (4, 3, 416, 320), (4, 3, 320, 416)]) +def test_space_to_depth(n, b, h, w): + tensor_input = torch.randn((n, b, h, w)) + out1 = focus_transform(tensor_input) + out2 = space_to_depth(tensor_input) + torch.testing.assert_allclose(out1, out2) diff --git a/yolort/models/common.py b/yolort/models/common.py index c70f5383..4e37705a 100644 --- a/yolort/models/common.py +++ b/yolort/models/common.py @@ -170,6 +170,15 @@ def focus_transform(x: Tensor) -> Tensor: return y +def space_to_depth(x: Tensor) -> Tensor: + '''x(b,c,w,h) -> y(b,4c,w/2,h/2)''' + N, C, H, W = x.size() + x = x.reshape(N, C, H // 2, 2, W // 2, 2) + x = x.permute(0, 5, 3, 1, 2, 4) + y = x.reshape(N, C * 4, H // 2, W // 2) + return y + + class Concat(nn.Module): # Concatenate a list of tensors along dimension def __init__(self, dimension: int = 1):