Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorrect drawing of rectangles #345

Open
Egorundel opened this issue Jan 30, 2024 · 0 comments
Open

Incorrect drawing of rectangles #345

Egorundel opened this issue Jan 30, 2024 · 0 comments

Comments

@Egorundel
Copy link

Egorundel commented Jan 30, 2024

Hello, can you tell me why the Boxes display is incorrect?

Picture of problem:
image

Code of Inference:
infer.cpp

#include <iostream>
#include <memory>
#include <cmath>
#include <stdexcept>
#include <vector>
#include <chrono>

#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>

#include <cuda_runtime.h>

#include "../../csrc/engine.h"

using namespace std;
using namespace cv;

int main(int argc, char *argv[]) {
    if (argc<3 || argc>4) {
        cerr << "Usage: " << argv[0] << " engine.plan image.jpg [<OUTPUT>.png]" << endl;
        return 1;
    }

    cout << "Loading engine..." << endl;
    auto engine = std::make_unique<odtk::Engine>(argv[1]);

    cout << "Preparing data..." << endl;
    auto image = imread(argv[2], IMREAD_COLOR);
    auto inputSize = engine->getInputSize();
    cv::resize(image, image, Size(inputSize[1], inputSize[0]));
    cv::Mat pixels;
    image.convertTo(pixels, CV_32FC3, 1.0 / 255, 0);

    int channels = 3;
    vector<float> img;
    vector<float> data (channels * inputSize[0] * inputSize[1]);

    if (pixels.isContinuous())
        img.assign((float*)pixels.datastart, (float*)pixels.dataend);
    else {
        cerr << "Error reading image " << argv[2] << endl;
        return -1;
    }

    vector<float> mean {0.485, 0.456, 0.406};
    vector<float> std {0.229, 0.224, 0.225};

    for (int c = 0; c < channels; c++) {
        for (int j = 0, hw = inputSize[0] * inputSize[1]; j < hw; j++) {
            data[c * hw + j] = (img[channels * j + 2 - c] - mean[c]) / std[c];
        }
    }

    // Create device buffers
    void *data_d, *classes_d, *scores_d, *boxes_d;
    auto num_det = engine->getMaxDetections();
    cout << "Max Detections: " << num_det << endl;
    engine->getBindingDimensions();

    vector<int> out_sizes = engine->getBindingSizes();
    cudaMalloc(&data_d, 12 * channels * inputSize[0] * inputSize[1]);
    cudaMalloc(&classes_d, out_sizes[0]);
    cudaMalloc(&scores_d, out_sizes[1]);
    cudaMalloc(&boxes_d, out_sizes[2]);


    // Copy image to device
    size_t dataSize = data.size() * sizeof(float);
    cudaMemcpy(data_d, data.data(), dataSize, cudaMemcpyHostToDevice);

    // Run inference n times
    cout << "Running inference..." << endl;
    const int count = 100;
    auto start = chrono::steady_clock::now();
    vector<void *> buffers = { data_d, classes_d, scores_d, boxes_d };
    for (int i = 0; i < count; i++) {
        engine->infer(buffers, 1);
    }
    auto stop = chrono::steady_clock::now();
    auto timing = chrono::duration_cast<chrono::duration<double>>(stop - start);
    cout << "Took " << timing.count() / count << " seconds per inference." << endl;

    cudaFree(data_d);

    // Get back the bounding boxes
    unique_ptr<float[]> classes(new float[out_sizes[0]]);
    unique_ptr<float[]> scores(new float[out_sizes[1]]);
    unique_ptr<float[]> boxes(new float[out_sizes[2]]);
    cudaMemcpy(classes.get(), classes_d, out_sizes[0], cudaMemcpyDeviceToHost);
    cudaMemcpy(scores.get(), scores_d, out_sizes[1], cudaMemcpyDeviceToHost);
    cudaMemcpy(boxes.get(), boxes_d, out_sizes[2], cudaMemcpyDeviceToHost);

    cudaFree(classes_d);
    cudaFree(scores_d);
    cudaFree(boxes_d);

    for (int i = 0; i < num_det; i++) {
        // Show results overconfidence threshold
        if (scores[i] >= 0.9f) {
            float x1 = boxes[i*4+0];
            float y1 = boxes[i*4+1];
            float x2 = boxes[i*4+2];
            float y2 = boxes[i*4+3];

            cout << "Found box {" << x1 << ", " << y1 << ", " << x2 << ", " << y2
                 << "} with score " << scores[i] << " and class " << round(abs(classes[i])) << endl;

            // Draw bounding box on image
            cv::rectangle(image, Point(x1, y1), Point(x2, y2), cv::Scalar(0, 255, 0));
        }
    }

    // Write image
    string out_file = argc == 4 ? string(argv[3]) : "detections.png";
    cout << "Saving result to " << out_file << endl;
    imwrite(out_file, image);

    return 0;
}

engine.cpp

#include "engine.h"

#include <iostream>
#include <fstream>

#include <NvOnnxConfig.h>
#include <NvOnnxParser.h>

#include "plugins/DecodePlugin.h"
#include "plugins/NMSPlugin.h"
#include "plugins/DecodeRotatePlugin.h"
#include "plugins/NMSRotatePlugin.h"
#include "calibrator.h"

#include <stdio.h>
#include <string>

using namespace nvinfer1;
using namespace nvonnxparser;
using namespace std;

namespace odtk {

class Logger : public ILogger {
public:
    Logger(bool verbose)
        : _verbose(verbose) {
    }

    void log(Severity severity, const char *msg) noexcept override {
        if (_verbose || ((severity != Severity::kINFO) && (severity != Severity::kVERBOSE)))
            cout << msg << endl;
    }

private:
   bool _verbose{false};
};

void Engine::_load(const string &path) {
    /// read a serialized file
    ifstream file(path, ios::in | ios::binary);
    if (!file) {
        cout << "read serialized file failed\n";
        std::exit(1);
    }

    file.seekg(0, std::ios::end);
    const int length = file.tellg();
    file.clear();
    file.seekg(0, ios::beg);
    std::shared_ptr<char> data(new char[length], std::default_delete<char[]>());
    file.read(data.get(), length);
    file.close();

    cout << "model size: " << length << endl;

    /// Initialization of the engine
    _engine = std::unique_ptr<ICudaEngine>(_runtime->deserializeCudaEngine(data.get(), length, nullptr));
}

void Engine::_prepare() {
    _context = std::unique_ptr<IExecutionContext>(_engine->createExecutionContext());
    _context->setOptimizationProfileAsync(0, _stream);
    cudaStreamCreate(&_stream);
}

Engine::Engine(const string &engine_path, bool verbose) {
    Logger logger(verbose);
    _runtime = std::unique_ptr<IRuntime>(createInferRuntime(logger));
    _load(engine_path);
    _prepare();
}

Engine::~Engine() {
    if (_stream) cudaStreamDestroy(_stream);
}

Engine::Engine(const char *onnx_model, size_t onnx_size, const vector<int>& dynamic_batch_opts,
    string precision, float score_thresh, int top_n, const vector<vector<float>>& anchors, 
    bool rotated, float nms_thresh, int detections_per_im, const vector<string>& calibration_images,
    string model_name, string calibration_table, bool verbose, size_t workspace_size) {

    Logger logger(verbose);
    _runtime = std::unique_ptr<IRuntime>(createInferRuntime(logger));

    bool fp16 = precision.compare("FP16") == 0;
    bool int8 = precision.compare("INT8") == 0;

    // Create builder
    auto builder = std::unique_ptr<IBuilder>(createInferBuilder(logger));
    auto builderConfig = std::unique_ptr<IBuilderConfig>(builder->createBuilderConfig());
    // Allow use of FP16 layers when running in INT8
    if(fp16 || int8) builderConfig->setFlag(BuilderFlag::kFP16);
    builderConfig->setMaxWorkspaceSize(workspace_size);
    
    // Parse ONNX FCN
    cout << "Building " << precision << " core model..." << endl;
    const auto flags = 1U << static_cast<int>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    auto network = std::unique_ptr<INetworkDefinition>(builder->createNetworkV2(flags));
    auto parser = std::unique_ptr<IParser>(createParser(*network, logger));
    parser->parse(onnx_model, onnx_size);
    
    auto input = network->getInput(0);
    auto inputDims = input->getDimensions();
    auto profile = builder->createOptimizationProfile();
    auto inputName = input->getName();
    auto profileDimsmin = Dims4{dynamic_batch_opts[0], inputDims.d[1], inputDims.d[2], inputDims.d[3]};
    auto profileDimsopt = Dims4{dynamic_batch_opts[1], inputDims.d[1], inputDims.d[2], inputDims.d[3]};
    auto profileDimsmax = Dims4{dynamic_batch_opts[2], inputDims.d[1], inputDims.d[2], inputDims.d[3]};

    profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMIN, profileDimsmin);
    profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kOPT, profileDimsopt);
    profile->setDimensions(inputName, nvinfer1::OptProfileSelector::kMAX, profileDimsmax);
    
    if(profile->isValid())
        builderConfig->addOptimizationProfile(profile);

    std::unique_ptr<Int8EntropyCalibrator> calib;
    if (int8) {
        builderConfig->setFlag(BuilderFlag::kINT8);
        // Calibration is performed using kOPT values of the profile.
        // Calibration batch size must match this profile.
        builderConfig->setCalibrationProfile(profile);
        ImageStream stream(dynamic_batch_opts[1], inputDims, calibration_images);
        calib = std::unique_ptr<Int8EntropyCalibrator>(new Int8EntropyCalibrator(stream, model_name, calibration_table));
        builderConfig->setInt8Calibrator(calib.get());
    }

    // Add decode plugins
    cout << "Building accelerated plugins..." << endl;
    vector<DecodePlugin> decodePlugins;
    vector<DecodeRotatePlugin> decodeRotatePlugins;
    vector<ITensor *> scores, boxes, classes;
    auto nbOutputs = network->getNbOutputs();
    
    for (int i = 0; i < nbOutputs / 2; i++) {
        auto classOutput = network->getOutput(i);
        auto boxOutput = network->getOutput(nbOutputs / 2 + i);
        auto outputDims = classOutput->getDimensions();
        int scale = inputDims.d[2] / outputDims.d[2];
        auto decodePlugin = DecodePlugin(score_thresh, top_n, anchors[i], scale);
        auto decodeRotatePlugin = DecodeRotatePlugin(score_thresh, top_n, anchors[i], scale);
        decodePlugins.push_back(decodePlugin); 
        decodeRotatePlugins.push_back(decodeRotatePlugin);
        vector<ITensor *> inputs = {classOutput, boxOutput};
        auto layer = (!rotated) ? network->addPluginV2(inputs.data(), inputs.size(), decodePlugin) \
                    : network->addPluginV2(inputs.data(), inputs.size(), decodeRotatePlugin);
        scores.push_back(layer->getOutput(0));
        boxes.push_back(layer->getOutput(1));
        classes.push_back(layer->getOutput(2));
    }

    // Cleanup outputs
    for (int i = 0; i < nbOutputs; i++) {
        auto output = network->getOutput(0);
        network->unmarkOutput(*output);
    }

    // Concat tensors from each feature map
    vector<ITensor *> concat;
    for (auto tensors : {scores, boxes, classes}) {
        auto layer = network->addConcatenation(tensors.data(), tensors.size());
        concat.push_back(layer->getOutput(0));
    }
    
    // Add NMS plugin
    auto nmsPlugin = NMSPlugin(nms_thresh, detections_per_im);
    auto nmsRotatePlugin = NMSRotatePlugin(nms_thresh, detections_per_im);
    auto layer = (!rotated) ? network->addPluginV2(concat.data(), concat.size(), nmsPlugin) \
                : network->addPluginV2(concat.data(), concat.size(), nmsRotatePlugin);
    vector<string> names = {"scores", "boxes", "classes"};
    for (int i = 0; i < layer->getNbOutputs(); i++) {
        auto output = layer->getOutput(i);
        network->markOutput(*output);
        output->setName(names[i].c_str());
    }
    
    // Build engine
    cout << "Applying optimizations and building TRT CUDA engine..." << endl;
    _plan = std::unique_ptr<IHostMemory>(builder->buildSerializedNetwork(*network, *builderConfig));
}

void Engine::save(const string &path) {
    cout << "Writing to " << path << "..." << endl;
    ofstream file(path, ios::out | ios::binary);
    file.write(reinterpret_cast<const char*>(_plan->data()), _plan->size());
}

void Engine::infer(vector<void *> &buffers, int batch){
    auto dims = _engine->getBindingDimensions(0);
    _context->setBindingDimensions(0, Dims4(batch, dims.d[1], dims.d[2], dims.d[3]));
    _context->enqueueV2(buffers.data(), _stream, nullptr);
    cudaStreamSynchronize(_stream);
}

vector<int> Engine::getInputSize() {
    auto dims = _engine->getBindingDimensions(0);
    return {dims.d[2], dims.d[3]};
}

int Engine::getMaxBatchSize() {
    return _engine->getMaxBatchSize();
}

int Engine::getMaxDetections() {
    return _engine->getBindingDimensions(1).d[1];
}

void Engine::getBindingDimensions() {
    for (int i = 0; i < _engine->getNbBindings(); ++i)
    {
        nvinfer1::Dims bindingDims = _engine->getBindingDimensions(i);
        std::cout << "\nBinding " << i << ":\n" << "    Dimensions: ";
        for (int j = 0; j < bindingDims.nbDims; ++j) {
            std::cout << bindingDims.d[j] << " ";
        }
    }
    std::cout << std::endl;
}

vector<int> Engine::getBindingSizes() {
    vector<int> vec_sizes;
    outDims0 = _engine->getBindingDimensions(1);
    for (int j = 0; j < outDims0.nbDims; j++) {
        outSize0 *= abs(outDims0.d[j]);
    }
    vec_sizes.push_back(outSize0);
    vector<nvinfer1::Dims> vec_dims;
    outDims1 = _engine->getBindingDimensions(2);
    for (int j = 0; j < outDims1.nbDims; j++) {
        outSize1 *= abs(outDims1.d[j]);
    }
    vec_sizes.push_back(outSize1);
    outDims2 = _engine->getBindingDimensions(3);
    for (int j = 0; j < outDims2.nbDims; j++) {
        outSize2 *= abs(outDims2.d[j]);
    }
    vec_sizes.push_back(outSize2);

    cout << "Размеры выходов:" << endl;
    for (int i=0; i<vec_sizes.size(); ++i) {
        cout << vec_sizes[i] << endl;
    }
    return vec_sizes;
}



int Engine::getStride() {
    return 1;
}

}
@Egorundel Egorundel changed the title Hello, can you tell me why the Boxes display is incorrect? Incorrect drawing of rectangles Jan 30, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant