finish efficientnet keras2onnx code

lcd1314 · Nov 17, 2020 · b9c00a6 · b9c00a6
1 parent 7cb6ab8
commit b9c00a6
Show file tree

Hide file tree

Showing 15 changed files with 1,332 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@ models|framework|instruction
 [alexnet](alexnet)|MXNet Gluon|MXNet Gluon example
 [arcface](arcface)|MXNet Symbol|MXNet Symbol and face recognition example
 [CenterFace](CenterFace)|ONNX|rewrite ONNX model and face detection example
+[efficientnet](efficientnet)|Keras|Keras to ONNX example
 [face_alignment](face_alignment)|MXNet Symbol|MXNet Symbol and face key points  detection example
 [FCN](FCN)|GluonCV|MXNet GluonCV semantic segmentation example
 [gender-age](gender-age)|MXNet Symbol|MXNet Symbol and face gender and age recognize example

diff --git a/efficientnet/CMakeLists.txt b/efficientnet/CMakeLists.txt
@@ -0,0 +1,39 @@
+cmake_minimum_required(VERSION 3.5)
+
+project(efficientnet_trt)
+
+set(CMAKE_CXX_STANDARD 14)
+
+# CUDA
+find_package(CUDA REQUIRED)
+message(STATUS "Find CUDA include at ${CUDA_INCLUDE_DIRS}")
+message(STATUS "Find CUDA libraries: ${CUDA_LIBRARIES}")
+
+# TensorRT
+set(TENSORRT_ROOT /usr/src/tensorrt/)
+find_path(TENSORRT_INCLUDE_DIR NvInfer.h
+        HINTS ${TENSORRT_ROOT} PATH_SUFFIXES include/)
+message(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
+find_library(TENSORRT_LIBRARY_INFER nvinfer
+        HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
+        PATH_SUFFIXES lib lib64 lib/x64)
+find_library(TENSORRT_LIBRARY_ONNXPARSER nvonnxparser
+        HINTS  ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
+        PATH_SUFFIXES lib lib64 lib/x64)
+set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_ONNXPARSER})
+message(STATUS "Find TensorRT libs: ${TENSORRT_LIBRARY}")
+
+# OpenCV
+find_package(OpenCV REQUIRED)
+message(STATUS "Find OpenCV include at ${OpenCV_INCLUDE_DIRS}")
+message(STATUS "Find OpenCV libraries: ${OpenCV_LIBRARIES}")
+
+set(COMMON_INCLUDE ../includes/common)
+set(YAML_INCLUDE ../includes/yaml-cpp/include)
+set(YAML_LIB_DIR ../includes/yaml-cpp/libs)
+
+include_directories(${CUDA_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${OpenCV_INCLUDE_DIRS} ${COMMON_INCLUDE} ${YAML_INCLUDE})
+link_directories(${YAML_LIB_DIR})
+
+add_executable(efficientnet_trt main.cpp efficientnet.cpp)
+target_link_libraries(efficientnet_trt ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES} ${TENSORRT_LIBRARY} yaml-cpp)
diff --git a/efficientnet/README.md b/efficientnet/README.md
@@ -0,0 +1,29 @@
+# EfficientNet Keras=>ONNX=>TensorRT
+
+## 1.Reference
+- **efficientnet arxiv:** [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
+- **efficientnet github:** [https://github.com/qubvel/efficientnet](https://github.com/qubvel/efficientnet)
+- **keras2onnx:** [https://github.com/onnx/keras-onnx](https://github.com/onnx/keras-onnx/blob/master/tutorial/TensorFlow_Keras_EfficientNet.ipynb)
+- **pypi:** [https://pypi.org/project/efficientnet](https://pypi.org/project/efficientnet)
+
+run this command to install efficientnet
+```
+pip install efficientnet
+```
+
+## 2.Export ONNX Model
+```
+python3 export_onnx.py
+```
+
+## 3.Build efficientnet_trt Project
+```
+mkdir build && cd build
+cmake ..
+make -j
+```
+
+## 4.run efficientnet_trt
+```
+./efficientnet_trt ../config.yaml ../samples
+```
diff --git a/efficientnet/config.yaml b/efficientnet/config.yaml
@@ -0,0 +1,10 @@
+efficientnet:
+    onnx_file:     "../efficientnet-b0.onnx"
+    engine_file:   "../efficientnet-b0.trt"
+    labels_file:   "../label.txt"
+    BATCH_SIZE:    1
+    INPUT_CHANNEL: 3
+    IMAGE_WIDTH:   224
+    IMAGE_HEIGHT:  224
+    img_mean:      [ 0.485, 0.456, 0.406 ]
+    img_std:       [ 0.229, 0.224, 0.225 ]
diff --git a/efficientnet/efficientnet.cpp b/efficientnet/efficientnet.cpp
@@ -0,0 +1,165 @@
+#include "efficientnet.h"
+#include "yaml-cpp/yaml.h"
+#include "common.hpp"
+
+EfficientNet::EfficientNet(const std::string &config_file) {
+    YAML::Node root = YAML::LoadFile(config_file);
+    YAML::Node config = root["efficientnet"];
+    onnx_file = config["onnx_file"].as<std::string>();
+    engine_file = config["engine_file"].as<std::string>();
+    labels_file = config["labels_file"].as<std::string>();
+    BATCH_SIZE = config["BATCH_SIZE"].as<int>();
+    INPUT_CHANNEL = config["INPUT_CHANNEL"].as<int>();
+    IMAGE_WIDTH = config["IMAGE_WIDTH"].as<int>();
+    IMAGE_HEIGHT = config["IMAGE_HEIGHT"].as<int>();
+    img_mean = config["img_mean"].as<std::vector<float>>();
+    img_std = config["img_std"].as<std::vector<float>>();
+    imagenet_labels = readImageNetLabel(labels_file);
+}
+
+EfficientNet::~EfficientNet() = default;
+
+void EfficientNet::LoadEngine() {
+    // create and load engine
+    std::fstream existEngine;
+    existEngine.open(engine_file, std::ios::in);
+    if (existEngine) {
+        readTrtFile(engine_file, engine);
+        assert(engine != nullptr);
+    } else {
+        onnxToTRTModel(onnx_file, engine_file, engine, BATCH_SIZE);
+        assert(engine != nullptr);
+    }
+}
+
+bool EfficientNet::InferenceFolder(const std::string &folder_name) {
+    std::vector<std::string> sample_images = readFolder(folder_name);
+    //get context
+    assert(engine != nullptr);
+    context = engine->createExecutionContext();
+    assert(context != nullptr);
+
+    //get buffers
+    assert(engine->getNbBindings() == 2);
+    void *buffers[2];
+    std::vector<int64_t> bufferSize;
+    int nbBindings = engine->getNbBindings();
+    bufferSize.resize(nbBindings);
+
+    for (int i = 0; i < nbBindings; ++i) {
+        nvinfer1::Dims dims = engine->getBindingDimensions(i);
+        nvinfer1::DataType dtype = engine->getBindingDataType(i);
+        int64_t totalSize = volume(dims) * 1 * getElementSize(dtype);
+        bufferSize[i] = totalSize;
+        std::cout << "binding" << i << ": " << totalSize << std::endl;
+        cudaMalloc(&buffers[i], totalSize);
+    }
+
+    //get stream
+    cudaStream_t stream;
+    cudaStreamCreate(&stream);
+
+    int outSize = bufferSize[1] / sizeof(float) / BATCH_SIZE;
+
+    EngineInference(sample_images, outSize, buffers, bufferSize, stream);
+
+    // release the stream and the buffers
+    cudaStreamDestroy(stream);
+    cudaFree(buffers[0]);
+    cudaFree(buffers[1]);
+
+    // destroy the engine
+    context->destroy();
+    engine->destroy();
+}
+
+void EfficientNet::EngineInference(const std::vector<std::string> &image_list, const int &outSize, void **buffers,
+                              const std::vector<int64_t> &bufferSize, cudaStream_t stream) {
+    int index = 0;
+    int batch_id = 0;
+    std::vector<cv::Mat> vec_Mat(BATCH_SIZE);
+    float total_time = 0;
+    for (const std::string &image_name : image_list)
+    {
+        index++;
+        std::cout << "Processing: " << image_name << std::endl;
+        cv::Mat src_img = cv::imread(image_name);
+        if (src_img.data)
+        {
+            cv::cvtColor(src_img, src_img, cv::COLOR_BGR2RGB);
+            vec_Mat[batch_id] = src_img.clone();
+            batch_id++;
+        }
+        if (batch_id == BATCH_SIZE or index == image_list.size())
+        {
+            auto t_start_pre = std::chrono::high_resolution_clock::now();
+            std::cout << "prepareImage" << std::endl;
+            std::vector<float>curInput = prepareImage(vec_Mat);
+            auto t_end_pre = std::chrono::high_resolution_clock::now();
+            float total_pre = std::chrono::duration<float, std::milli>(t_end_pre - t_start_pre).count();
+            std::cout << "prepare image take: " << total_pre << " ms." << std::endl;
+            total_time += total_pre;
+            batch_id = 0;
+            if (!curInput.data()) {
+                std::cout << "prepare images ERROR!" << std::endl;
+                continue;
+            }
+            // DMA the input to the GPU,  execute the batch asynchronously, and DMA it back:
+            std::cout << "host2device" << std::endl;
+            cudaMemcpyAsync(buffers[0], curInput.data(), bufferSize[0], cudaMemcpyHostToDevice, stream);
+
+            // do inference
+            std::cout << "execute" << std::endl;
+            auto t_start = std::chrono::high_resolution_clock::now();
+            context->execute(BATCH_SIZE, buffers);
+            auto t_end = std::chrono::high_resolution_clock::now();
+            float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count();
+            std::cout << "Inference take: " << total_inf << " ms." << std::endl;
+            total_time += total_inf;
+            std::cout << "execute success" << std::endl;
+            std::cout << "device2host" << std::endl;
+            std::cout << "post process" << std::endl;
+            auto r_start = std::chrono::high_resolution_clock::now();
+            float out[outSize * BATCH_SIZE];
+            cudaMemcpyAsync(out, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream);
+            cudaStreamSynchronize(stream);
+
+            for (int i = 0; i < BATCH_SIZE; i++)
+            {
+                auto result = std::max_element(out + i * outSize, out + (i + 1) * outSize);
+                std::string result_name = imagenet_labels[result - (out + i * outSize)];
+                std::cout << "result: " << result_name << std::endl;
+            }
+
+            auto r_end = std::chrono::high_resolution_clock::now();
+            float total_res = std::chrono::duration<float, std::milli>(r_end - r_start).count();
+            std::cout << "Post process take: " << total_res << " ms." << std::endl;
+            total_time += total_res;
+            vec_Mat = std::vector<cv::Mat>(BATCH_SIZE);
+        }
+    }
+    std::cout << "Average processing time is " << total_time / image_list.size() << "ms" << std::endl;
+}
+
+std::vector<float> EfficientNet::prepareImage(std::vector<cv::Mat> &vec_img) {
+    std::vector<float> result(BATCH_SIZE * IMAGE_WIDTH * IMAGE_HEIGHT * INPUT_CHANNEL);
+    float *data = result.data();
+    for (const cv::Mat &src_img : vec_img)
+    {
+        if (!src_img.data)
+            continue;
+        cv::Mat rsz_img, flt_img;
+        float ratio = std::max(float(IMAGE_WIDTH + 32) / float(src_img.cols), float(IMAGE_HEIGHT + 32) / float(src_img.rows));
+        cv::resize(src_img, rsz_img, cv::Size(), ratio, ratio);
+        flt_img = rsz_img(cv::Rect((rsz_img.cols - IMAGE_WIDTH) / 2, (rsz_img.rows - IMAGE_HEIGHT) / 2, IMAGE_WIDTH, IMAGE_HEIGHT));
+        flt_img.convertTo(flt_img, CV_32FC3, 1.0 / 255);
+        std::vector<cv::Mat> split_img(INPUT_CHANNEL);
+        cv::split(flt_img, split_img);
+        for (int i = 0; i < INPUT_CHANNEL; ++i)
+            split_img[i] = (split_img[i] - img_mean[i]) / img_std[i];
+        cv::merge(split_img, flt_img);
+        int channelLength = IMAGE_WIDTH * IMAGE_HEIGHT * INPUT_CHANNEL;
+        memcpy(data, flt_img.data, channelLength * sizeof(float));
+    }
+    return result;
+}
diff --git a/efficientnet/efficientnet.h b/efficientnet/efficientnet.h
@@ -0,0 +1,33 @@
+#ifndef EFFICIENTNET_TRT_EFFICIENTNET_H
+#define EFFICIENTNET_TRT_EFFICIENTNET_H
+
+#include <opencv2/opencv.hpp>
+#include "NvInfer.h"
+
+class EfficientNet
+{
+public:
+    EfficientNet(const std::string &config_file);
+    ~EfficientNet();
+    void LoadEngine();
+    bool InferenceFolder(const std::string &folder_name);
+
+private:
+    void EngineInference(const std::vector<std::string> &image_list, const int &outSize,void **buffers,
+                         const std::vector<int64_t> &bufferSize, cudaStream_t stream);
+    std::vector<float> prepareImage(std::vector<cv::Mat> & vec_img);
+    std::string onnx_file;
+    std::string engine_file;
+    std::string labels_file;
+    std::map<int, std::string> imagenet_labels;
+    int BATCH_SIZE;
+    int INPUT_CHANNEL;
+    int IMAGE_WIDTH;
+    int IMAGE_HEIGHT;
+    std::vector<float> img_mean;
+    std::vector<float> img_std;
+    nvinfer1::ICudaEngine *engine = nullptr;
+    nvinfer1::IExecutionContext *context = nullptr;
+};
+
+#endif //EFFICIENTNET_TRT_EFFICIENTNET_H
diff --git a/efficientnet/export_onnx.py b/efficientnet/export_onnx.py
@@ -0,0 +1,15 @@
+import onnx
+import keras2onnx
+import efficientnet.tfkeras as efn
+import argparse
+
+parser = argparse.ArgumentParser(description='Export efficientnet ONNX')
+parser.add_argument('--batch_size', default=1, type=int, help='batch size.')
+args = parser.parse_args()
+
+model = efn.EfficientNetB0(weights='imagenet')
+
+onnx_model = keras2onnx.convert_keras(model, model.name)
+onnx_model.graph.input[0].type.tensor_type.shape.dim[0].dim_value = args.batch_size
+onnx_model.graph.output[0].type.tensor_type.shape.dim[0].dim_value = args.batch_size
+onnx.save_model(onnx_model, model.name + '.onnx')
diff --git a/efficientnet/inference.py b/efficientnet/inference.py
@@ -0,0 +1,24 @@
+import os
+import numpy as np
+import efficientnet.tfkeras as efn
+from tensorflow.keras.applications.imagenet_utils import decode_predictions, preprocess_input
+from efficientnet.preprocessing import center_crop_and_resize
+from skimage.io import imread
+
+model = efn.EfficientNetB0(weights='imagenet')
+with open('./label.txt', 'r') as f:
+    text_labels = [''.join(l.split("'")[1]) for l in f]
+
+image_list = os.listdir('./samples')
+for image_name in image_list:
+    image_path = os.path.join('./samples', image_name)
+    print(image_path)
+    image = imread(image_path)
+    image_size = model.input_shape[1]
+    x = center_crop_and_resize(image, image_size=image_size)
+    x = preprocess_input(x, mode='torch')
+    inputs = np.expand_dims(x, 0)
+    expected = model.predict(inputs)
+    result = decode_predictions(expected, top=1)
+    print('With prob = %.2f, it contains %s' % (
+        result[0][0][2] * 100, result[0][0][1]))