Incorrect drawing of rectangles #345

Egorundel · 2024-01-30T07:55:41Z

Hello, can you tell me why the Boxes display is incorrect?

Picture of problem:

Code of Inference:
infer.cpp

#include <iostream>
#include <memory>
#include <cmath>
#include <stdexcept>
#include <vector>
#include <chrono>

#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>

#include <cuda_runtime.h>

#include "../../csrc/engine.h"

using namespace std;
using namespace cv;

int main(int argc, char *argv[]) {
    if (argc<3 || argc>4) {
        cerr << "Usage: " << argv[0] << " engine.plan image.jpg [<OUTPUT>.png]" << endl;
        return 1;
    }

    cout << "Loading engine..." << endl;
    auto engine = std::make_unique<odtk::Engine>(argv[1]);

    cout << "Preparing data..." << endl;
    auto image = imread(argv[2], IMREAD_COLOR);
    auto inputSize = engine->getInputSize();
    cv::resize(image, image, Size(inputSize[1], inputSize[0]));
    cv::Mat pixels;
    image.convertTo(pixels, CV_32FC3, 1.0 / 255, 0);

    int channels = 3;
    vector<float> img;
    vector<float> data (channels * inputSize[0] * inputSize[1]);

    if (pixels.isContinuous())
        img.assign((float*)pixels.datastart, (float*)pixels.dataend);
    else {
        cerr << "Error reading image " << argv[2] << endl;
        return -1;
    }

    vector<float> mean {0.485, 0.456, 0.406};
    vector<float> std {0.229, 0.224, 0.225};

    for (int c = 0; c < channels; c++) {
        for (int j = 0, hw = inputSize[0] * inputSize[1]; j < hw; j++) {
            data[c * hw + j] = (img[channels * j + 2 - c] - mean[c]) / std[c];
        }
    }

    // Create device buffers
    void *data_d, *classes_d, *scores_d, *boxes_d;
    auto num_det = engine->getMaxDetections();
    cout << "Max Detections: " << num_det << endl;
    engine->getBindingDimensions();

    vector<int> out_sizes = engine->getBindingSizes();
    cudaMalloc(&data_d, 12 * channels * inputSize[0] * inputSize[1]);
    cudaMalloc(&classes_d, out_sizes[0]);
    cudaMalloc(&scores_d, out_sizes[1]);
    cudaMalloc(&boxes_d, out_sizes[2]);


    // Copy image to device
    size_t dataSize = data.size() * sizeof(float);
    cudaMemcpy(data_d, data.data(), dataSize, cudaMemcpyHostToDevice);

    // Run inference n times
    cout << "Running inference..." << endl;
    const int count = 100;
    auto start = chrono::steady_clock::now();
    vector<void *> buffers = { data_d, classes_d, scores_d, boxes_d };
    for (int i = 0; i < count; i++) {
        engine->infer(buffers, 1);
    }
    auto stop = chrono::steady_clock::now();
    auto timing = chrono::duration_cast<chrono::duration<double>>(stop - start);
    cout << "Took " << timing.count() / count << " seconds per inference." << endl;

    cudaFree(data_d);

    // Get back the bounding boxes
    unique_ptr<float[]> classes(new float[out_sizes[0]]);
    unique_ptr<float[]> scores(new float[out_sizes[1]]);
    unique_ptr<float[]> boxes(new float[out_sizes[2]]);
    cudaMemcpy(classes.get(), classes_d, out_sizes[0], cudaMemcpyDeviceToHost);
    cudaMemcpy(scores.get(), scores_d, out_sizes[1], cudaMemcpyDeviceToHost);
    cudaMemcpy(boxes.get(), boxes_d, out_sizes[2], cudaMemcpyDeviceToHost);

    cudaFree(classes_d);
    cudaFree(scores_d);
    cudaFree(boxes_d);

    for (int i = 0; i < num_det; i++) {
        // Show results overconfidence threshold
        if (scores[i] >= 0.9f) {
            float x1 = boxes[i*4+0];
            float y1 = boxes[i*4+1];
            float x2 = boxes[i*4+2];
            float y2 = boxes[i*4+3];

            cout << "Found box {" << x1 << ", " << y1 << ", " << x2 << ", " << y2
                 << "} with score " << scores[i] << " and class " << round(abs(classes[i])) << endl;

            // Draw bounding box on image
            cv::rectangle(image, Point(x1, y1), Point(x2, y2), cv::Scalar(0, 255, 0));
        }
    }

    // Write image
    string out_file = argc == 4 ? string(argv[3]) : "detections.png";
    cout << "Saving result to " << out_file << endl;
    imwrite(out_file, image);

    return 0;
}

engine.cpp

#include "engine.h"

#include <iostream>
#include <fstream>

#include <NvOnnxConfig.h>
#include <NvOnnxParser.h>

#include "plugins/DecodePlugin.h"
#include "plugins/NMSPlugin.h"
#include "plugins/DecodeRotatePlugin.h"
#include "plugins/NMSRotatePlugin.h"
#include "calibrator.h"

#include <stdio.h>
#include <string>

using namespace nvinfer1;
using namespace nvonnxparser;
using namespace std;

namespace odtk {

class Logger : public ILogger {
public:
    Logger(bool verbose)
        : _verbose(verbose) {
    }

    void log(Severity severity, const char *msg) noexcept override {
        if (_verbose || ((severity != Severity::kINFO) && (severity != Severity::kVERBOSE)))
            cout << msg << endl;
    }

private:
   bool _verbose{false};
};

void Engine::_load(const string &path) {
    /// read a serialized file
    ifstream file(path, ios::in | ios::binary);
    if (!file) {
        cout << "read serialized file failed\n";
        std::exit(1);
    }

    file.seekg(0, std::ios::end);
    const int length = file.tellg();
    file.clear();
    file.seekg(0, ios::beg);
    std::shared_ptr<char> data(new char[length], std::default_delete<char[]>());
    file.read(data.get(), length);
    file.close();

    cout << "model size: " << length << endl;

    /// Initialization of the engine
    _engine = std::unique_ptr<ICudaEngine>(_runtime->deserializeCudaEngine(data.get(), length, nullptr));
}

void Engine::_prepare() {
    _context = std::unique_ptr<IExecutionContext>(_engine->createExecutionContext());
    _context->setOptimizationProfileAsync(0, _stream);
    cudaStreamCreate(&_stream);
}

Engine::Engine(const string &engine_path, bool verbose) {
    Logger logger(verbose);
    _runtime = std::unique_ptr<IRuntime>(createInferRuntime(logger));
    _load(engine_path);
    _prepare();
}

Engine::~Engine() {
    if (_stream) cudaStreamDestroy(_stream);
}

Engine::Engine(const char *onnx_model, size_t onnx_size, const vector<int>& dynamic_batch_opts,
    string precision, float score_thresh, int top_n, const vector<vector<float>>& anchors, 
    bool rotated, float nms_thresh, int detections_per_im, const vector<string>& calibration_images,
    string model_name, string calibration_table, bool verbose, size_t workspace_size) {

    Logger logger(verbose);
    _runtime = std::unique_ptr<IRuntime>(createInferRuntime(logger));

    bool fp16 = precision.compare("FP16") == 0;
    bool int8 = precision.compare("INT8") == 0;

    // Create builder
    auto builder = std::unique_ptr<IBuilder>(createInferBuilder(logger));
    auto builderConfig = std::unique_ptr<IBuilderConfig>(builder->createBuilderConfig());
    // Allow use of FP16 layers when running in INT8
    if(fp16 || int8) builderConfig->setFlag(BuilderFlag::kFP16);
    builderConfig->setMaxWorkspaceSize(workspace_size);
    
    // Parse ONNX FCN
    cout << "Building " << precision << " core model..." << endl;
    const auto flags = 1U << static_cast<int>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    auto network = std::unique_ptr<INetworkDefinition>(builder->createNetworkV2(flags));
    auto parser = std::unique_ptr<IParser>(createParser(*network, logger));
    parser->parse(onnx_model, onnx_size);
    
    auto input = network->getInput(0);
    auto inputDims = input->getDimensions();
    auto profile = builder->createOptimizationProfile();
    auto inputName = input->getName();
    auto profileDimsmin = Dims4{dynamic_batch_opts[0], inputDims.d[1], inputDims.d[2], inputDims.d[3]};
    auto profileDimsopt = Dims4{dynamic_batch_opts[1], inputDims.d[1], inputDims.d[2], inputDims.d[3]};
    auto profileDimsmax = Dims4{dynamic_batch_opts[2], inputDims.d[1], inputDims.d[2], inputDims.d[3]};

    profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMIN, profileDimsmin);
    profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kOPT, profileDimsopt);
    profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMAX, profileDimsmax);
    
    if(profile->isValid())
        builderConfig->addOptimizationProfile(profile);

    std::unique_ptr<Int8EntropyCalibrator> calib;
    if (int8) {
        builderConfig->setFlag(BuilderFlag::kINT8);
        // Calibration is performed using kOPT values of the profile.
        // Calibration batch size must match this profile.
        builderConfig->setCalibrationProfile(profile);
        ImageStream stream(dynamic_batch_opts[1], inputDims, calibration_images);
        calib = std::unique_ptr<Int8EntropyCalibrator>(new Int8EntropyCalibrator(stream, model_name, calibration_table));
        builderConfig->setInt8Calibrator(calib.get());
    }

    // Add decode plugins
    cout << "Building accelerated plugins..." << endl;
    vector<DecodePlugin> decodePlugins;
    vector<DecodeRotatePlugin> decodeRotatePlugins;
    vector<ITensor *> scores, boxes, classes;
    auto nbOutputs = network->getNbOutputs();
    
    for (int i = 0; i < nbOutputs / 2; i++) {
        auto classOutput = network->getOutput(i);
        auto boxOutput = network->getOutput(nbOutputs / 2 + i);
        auto outputDims = classOutput->getDimensions();
        int scale = inputDims.d[2] / outputDims.d[2];
        auto decodePlugin = DecodePlugin(score_thresh, top_n, anchors[i], scale);
        auto decodeRotatePlugin = DecodeRotatePlugin(score_thresh, top_n, anchors[i], scale);
        decodePlugins.push_back(decodePlugin); 
        decodeRotatePlugins.push_back(decodeRotatePlugin);
        vector<ITensor *> inputs = {classOutput, boxOutput};
        auto layer = (!rotated) ? network->addPluginV2(inputs.data(), inputs.size(), decodePlugin) \
                    : network->addPluginV2(inputs.data(), inputs.size(), decodeRotatePlugin);
        scores.push_back(layer->getOutput(0));
        boxes.push_back(layer->getOutput(1));
        classes.push_back(layer->getOutput(2));
    }

    // Cleanup outputs
    for (int i = 0; i < nbOutputs; i++) {
        auto output = network->getOutput(0);
        network->unmarkOutput(*output);
    }

    // Concat tensors from each feature map
    vector<ITensor *> concat;
    for (auto tensors : {scores, boxes, classes}) {
        auto layer = network->addConcatenation(tensors.data(), tensors.size());
        concat.push_back(layer->getOutput(0));
    }
    
    // Add NMS plugin
    auto nmsPlugin = NMSPlugin(nms_thresh, detections_per_im);
    auto nmsRotatePlugin = NMSRotatePlugin(nms_thresh, detections_per_im);
    auto layer = (!rotated) ? network->addPluginV2(concat.data(), concat.size(), nmsPlugin) \
                : network->addPluginV2(concat.data(), concat.size(), nmsRotatePlugin);
    vector<string> names = {"scores", "boxes", "classes"};
    for (int i = 0; i < layer->getNbOutputs(); i++) {
        auto output = layer->getOutput(i);
        network->markOutput(*output);
        output->setName(names[i].c_str());
    }
    
    // Build engine
    cout << "Applying optimizations and building TRT CUDA engine..." << endl;
    _plan = std::unique_ptr<IHostMemory>(builder->buildSerializedNetwork(*network, *builderConfig));
}

void Engine::save(const string &path) {
    cout << "Writing to " << path << "..." << endl;
    ofstream file(path, ios::out | ios::binary);
    file.write(reinterpret_cast<const char*>(_plan->data()), _plan->size());
}

void Engine::infer(vector<void *> &buffers, int batch){
    auto dims = _engine->getBindingDimensions(0);
    _context->setBindingDimensions(0, Dims4(batch, dims.d[1], dims.d[2], dims.d[3]));
    _context->enqueueV2(buffers.data(), _stream, nullptr);
    cudaStreamSynchronize(_stream);
}

vector<int> Engine::getInputSize() {
    auto dims = _engine->getBindingDimensions(0);
    return {dims.d[2], dims.d[3]};
}

int Engine::getMaxBatchSize() {
    return _engine->getMaxBatchSize();
}

int Engine::getMaxDetections() {
    return _engine->getBindingDimensions(1).d[1];
}

void Engine::getBindingDimensions() {
    for (int i = 0; i < _engine->getNbBindings(); ++i)
    {
        nvinfer1::Dims bindingDims = _engine->getBindingDimensions(i);
        std::cout << "\nBinding " << i << ":\n" << "    Dimensions: ";
        for (int j = 0; j < bindingDims.nbDims; ++j) {
            std::cout << bindingDims.d[j] << " ";
        }
    }
    std::cout << std::endl;
}

vector<int> Engine::getBindingSizes() {
    vector<int> vec_sizes;
    outDims0 = _engine->getBindingDimensions(1);
    for (int j = 0; j < outDims0.nbDims; j++) {
        outSize0 *= abs(outDims0.d[j]);
    }
    vec_sizes.push_back(outSize0);
    vector<nvinfer1::Dims> vec_dims;
    outDims1 = _engine->getBindingDimensions(2);
    for (int j = 0; j < outDims1.nbDims; j++) {
        outSize1 *= abs(outDims1.d[j]);
    }
    vec_sizes.push_back(outSize1);
    outDims2 = _engine->getBindingDimensions(3);
    for (int j = 0; j < outDims2.nbDims; j++) {
        outSize2 *= abs(outDims2.d[j]);
    }
    vec_sizes.push_back(outSize2);

    cout << "Размеры выходов:" << endl;
    for (int i=0; i<vec_sizes.size(); ++i) {
        cout << vec_sizes[i] << endl;
    }
    return vec_sizes;
}



int Engine::getStride() {
    return 1;
}

}

The text was updated successfully, but these errors were encountered:

Egorundel changed the title ~~Hello, can you tell me why the Boxes display is incorrect?~~ Incorrect drawing of rectangles Jan 30, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Incorrect drawing of rectangles #345

Incorrect drawing of rectangles #345

Egorundel commented Jan 30, 2024 •

edited

Loading

Incorrect drawing of rectangles #345

Incorrect drawing of rectangles #345

Comments

Egorundel commented Jan 30, 2024 • edited Loading

Egorundel commented Jan 30, 2024 •

edited

Loading