forked from linghu8812/tensorrt_inference
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7cb6ab8
commit b9c00a6
Showing
15 changed files
with
1,332 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
cmake_minimum_required(VERSION 3.5) | ||
|
||
project(efficientnet_trt) | ||
|
||
set(CMAKE_CXX_STANDARD 14) | ||
|
||
# CUDA | ||
find_package(CUDA REQUIRED) | ||
message(STATUS "Find CUDA include at ${CUDA_INCLUDE_DIRS}") | ||
message(STATUS "Find CUDA libraries: ${CUDA_LIBRARIES}") | ||
|
||
# TensorRT | ||
set(TENSORRT_ROOT /usr/src/tensorrt/) | ||
find_path(TENSORRT_INCLUDE_DIR NvInfer.h | ||
HINTS ${TENSORRT_ROOT} PATH_SUFFIXES include/) | ||
message(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}") | ||
find_library(TENSORRT_LIBRARY_INFER nvinfer | ||
HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR} | ||
PATH_SUFFIXES lib lib64 lib/x64) | ||
find_library(TENSORRT_LIBRARY_ONNXPARSER nvonnxparser | ||
HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR} | ||
PATH_SUFFIXES lib lib64 lib/x64) | ||
set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_ONNXPARSER}) | ||
message(STATUS "Find TensorRT libs: ${TENSORRT_LIBRARY}") | ||
|
||
# OpenCV | ||
find_package(OpenCV REQUIRED) | ||
message(STATUS "Find OpenCV include at ${OpenCV_INCLUDE_DIRS}") | ||
message(STATUS "Find OpenCV libraries: ${OpenCV_LIBRARIES}") | ||
|
||
set(COMMON_INCLUDE ../includes/common) | ||
set(YAML_INCLUDE ../includes/yaml-cpp/include) | ||
set(YAML_LIB_DIR ../includes/yaml-cpp/libs) | ||
|
||
include_directories(${CUDA_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${OpenCV_INCLUDE_DIRS} ${COMMON_INCLUDE} ${YAML_INCLUDE}) | ||
link_directories(${YAML_LIB_DIR}) | ||
|
||
add_executable(efficientnet_trt main.cpp efficientnet.cpp) | ||
target_link_libraries(efficientnet_trt ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES} ${TENSORRT_LIBRARY} yaml-cpp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# EfficientNet Keras=>ONNX=>TensorRT | ||
|
||
## 1.Reference | ||
- **efficientnet arxiv:** [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) | ||
- **efficientnet github:** [https://github.com/qubvel/efficientnet](https://github.com/qubvel/efficientnet) | ||
- **keras2onnx:** [https://github.com/onnx/keras-onnx](https://github.com/onnx/keras-onnx/blob/master/tutorial/TensorFlow_Keras_EfficientNet.ipynb) | ||
- **pypi:** [https://pypi.org/project/efficientnet](https://pypi.org/project/efficientnet) | ||
|
||
run this command to install efficientnet | ||
``` | ||
pip install efficientnet | ||
``` | ||
|
||
## 2.Export ONNX Model | ||
``` | ||
python3 export_onnx.py | ||
``` | ||
|
||
## 3.Build efficientnet_trt Project | ||
``` | ||
mkdir build && cd build | ||
cmake .. | ||
make -j | ||
``` | ||
|
||
## 4.run efficientnet_trt | ||
``` | ||
./efficientnet_trt ../config.yaml ../samples | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
efficientnet: | ||
onnx_file: "../efficientnet-b0.onnx" | ||
engine_file: "../efficientnet-b0.trt" | ||
labels_file: "../label.txt" | ||
BATCH_SIZE: 1 | ||
INPUT_CHANNEL: 3 | ||
IMAGE_WIDTH: 224 | ||
IMAGE_HEIGHT: 224 | ||
img_mean: [ 0.485, 0.456, 0.406 ] | ||
img_std: [ 0.229, 0.224, 0.225 ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
#include "efficientnet.h" | ||
#include "yaml-cpp/yaml.h" | ||
#include "common.hpp" | ||
|
||
EfficientNet::EfficientNet(const std::string &config_file) { | ||
YAML::Node root = YAML::LoadFile(config_file); | ||
YAML::Node config = root["efficientnet"]; | ||
onnx_file = config["onnx_file"].as<std::string>(); | ||
engine_file = config["engine_file"].as<std::string>(); | ||
labels_file = config["labels_file"].as<std::string>(); | ||
BATCH_SIZE = config["BATCH_SIZE"].as<int>(); | ||
INPUT_CHANNEL = config["INPUT_CHANNEL"].as<int>(); | ||
IMAGE_WIDTH = config["IMAGE_WIDTH"].as<int>(); | ||
IMAGE_HEIGHT = config["IMAGE_HEIGHT"].as<int>(); | ||
img_mean = config["img_mean"].as<std::vector<float>>(); | ||
img_std = config["img_std"].as<std::vector<float>>(); | ||
imagenet_labels = readImageNetLabel(labels_file); | ||
} | ||
|
||
EfficientNet::~EfficientNet() = default; | ||
|
||
void EfficientNet::LoadEngine() { | ||
// create and load engine | ||
std::fstream existEngine; | ||
existEngine.open(engine_file, std::ios::in); | ||
if (existEngine) { | ||
readTrtFile(engine_file, engine); | ||
assert(engine != nullptr); | ||
} else { | ||
onnxToTRTModel(onnx_file, engine_file, engine, BATCH_SIZE); | ||
assert(engine != nullptr); | ||
} | ||
} | ||
|
||
bool EfficientNet::InferenceFolder(const std::string &folder_name) { | ||
std::vector<std::string> sample_images = readFolder(folder_name); | ||
//get context | ||
assert(engine != nullptr); | ||
context = engine->createExecutionContext(); | ||
assert(context != nullptr); | ||
|
||
//get buffers | ||
assert(engine->getNbBindings() == 2); | ||
void *buffers[2]; | ||
std::vector<int64_t> bufferSize; | ||
int nbBindings = engine->getNbBindings(); | ||
bufferSize.resize(nbBindings); | ||
|
||
for (int i = 0; i < nbBindings; ++i) { | ||
nvinfer1::Dims dims = engine->getBindingDimensions(i); | ||
nvinfer1::DataType dtype = engine->getBindingDataType(i); | ||
int64_t totalSize = volume(dims) * 1 * getElementSize(dtype); | ||
bufferSize[i] = totalSize; | ||
std::cout << "binding" << i << ": " << totalSize << std::endl; | ||
cudaMalloc(&buffers[i], totalSize); | ||
} | ||
|
||
//get stream | ||
cudaStream_t stream; | ||
cudaStreamCreate(&stream); | ||
|
||
int outSize = bufferSize[1] / sizeof(float) / BATCH_SIZE; | ||
|
||
EngineInference(sample_images, outSize, buffers, bufferSize, stream); | ||
|
||
// release the stream and the buffers | ||
cudaStreamDestroy(stream); | ||
cudaFree(buffers[0]); | ||
cudaFree(buffers[1]); | ||
|
||
// destroy the engine | ||
context->destroy(); | ||
engine->destroy(); | ||
} | ||
|
||
void EfficientNet::EngineInference(const std::vector<std::string> &image_list, const int &outSize, void **buffers, | ||
const std::vector<int64_t> &bufferSize, cudaStream_t stream) { | ||
int index = 0; | ||
int batch_id = 0; | ||
std::vector<cv::Mat> vec_Mat(BATCH_SIZE); | ||
float total_time = 0; | ||
for (const std::string &image_name : image_list) | ||
{ | ||
index++; | ||
std::cout << "Processing: " << image_name << std::endl; | ||
cv::Mat src_img = cv::imread(image_name); | ||
if (src_img.data) | ||
{ | ||
cv::cvtColor(src_img, src_img, cv::COLOR_BGR2RGB); | ||
vec_Mat[batch_id] = src_img.clone(); | ||
batch_id++; | ||
} | ||
if (batch_id == BATCH_SIZE or index == image_list.size()) | ||
{ | ||
auto t_start_pre = std::chrono::high_resolution_clock::now(); | ||
std::cout << "prepareImage" << std::endl; | ||
std::vector<float>curInput = prepareImage(vec_Mat); | ||
auto t_end_pre = std::chrono::high_resolution_clock::now(); | ||
float total_pre = std::chrono::duration<float, std::milli>(t_end_pre - t_start_pre).count(); | ||
std::cout << "prepare image take: " << total_pre << " ms." << std::endl; | ||
total_time += total_pre; | ||
batch_id = 0; | ||
if (!curInput.data()) { | ||
std::cout << "prepare images ERROR!" << std::endl; | ||
continue; | ||
} | ||
// DMA the input to the GPU, execute the batch asynchronously, and DMA it back: | ||
std::cout << "host2device" << std::endl; | ||
cudaMemcpyAsync(buffers[0], curInput.data(), bufferSize[0], cudaMemcpyHostToDevice, stream); | ||
|
||
// do inference | ||
std::cout << "execute" << std::endl; | ||
auto t_start = std::chrono::high_resolution_clock::now(); | ||
context->execute(BATCH_SIZE, buffers); | ||
auto t_end = std::chrono::high_resolution_clock::now(); | ||
float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count(); | ||
std::cout << "Inference take: " << total_inf << " ms." << std::endl; | ||
total_time += total_inf; | ||
std::cout << "execute success" << std::endl; | ||
std::cout << "device2host" << std::endl; | ||
std::cout << "post process" << std::endl; | ||
auto r_start = std::chrono::high_resolution_clock::now(); | ||
float out[outSize * BATCH_SIZE]; | ||
cudaMemcpyAsync(out, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream); | ||
cudaStreamSynchronize(stream); | ||
|
||
for (int i = 0; i < BATCH_SIZE; i++) | ||
{ | ||
auto result = std::max_element(out + i * outSize, out + (i + 1) * outSize); | ||
std::string result_name = imagenet_labels[result - (out + i * outSize)]; | ||
std::cout << "result: " << result_name << std::endl; | ||
} | ||
|
||
auto r_end = std::chrono::high_resolution_clock::now(); | ||
float total_res = std::chrono::duration<float, std::milli>(r_end - r_start).count(); | ||
std::cout << "Post process take: " << total_res << " ms." << std::endl; | ||
total_time += total_res; | ||
vec_Mat = std::vector<cv::Mat>(BATCH_SIZE); | ||
} | ||
} | ||
std::cout << "Average processing time is " << total_time / image_list.size() << "ms" << std::endl; | ||
} | ||
|
||
std::vector<float> EfficientNet::prepareImage(std::vector<cv::Mat> &vec_img) { | ||
std::vector<float> result(BATCH_SIZE * IMAGE_WIDTH * IMAGE_HEIGHT * INPUT_CHANNEL); | ||
float *data = result.data(); | ||
for (const cv::Mat &src_img : vec_img) | ||
{ | ||
if (!src_img.data) | ||
continue; | ||
cv::Mat rsz_img, flt_img; | ||
float ratio = std::max(float(IMAGE_WIDTH + 32) / float(src_img.cols), float(IMAGE_HEIGHT + 32) / float(src_img.rows)); | ||
cv::resize(src_img, rsz_img, cv::Size(), ratio, ratio); | ||
flt_img = rsz_img(cv::Rect((rsz_img.cols - IMAGE_WIDTH) / 2, (rsz_img.rows - IMAGE_HEIGHT) / 2, IMAGE_WIDTH, IMAGE_HEIGHT)); | ||
flt_img.convertTo(flt_img, CV_32FC3, 1.0 / 255); | ||
std::vector<cv::Mat> split_img(INPUT_CHANNEL); | ||
cv::split(flt_img, split_img); | ||
for (int i = 0; i < INPUT_CHANNEL; ++i) | ||
split_img[i] = (split_img[i] - img_mean[i]) / img_std[i]; | ||
cv::merge(split_img, flt_img); | ||
int channelLength = IMAGE_WIDTH * IMAGE_HEIGHT * INPUT_CHANNEL; | ||
memcpy(data, flt_img.data, channelLength * sizeof(float)); | ||
} | ||
return result; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#ifndef EFFICIENTNET_TRT_EFFICIENTNET_H | ||
#define EFFICIENTNET_TRT_EFFICIENTNET_H | ||
|
||
#include <opencv2/opencv.hpp> | ||
#include "NvInfer.h" | ||
|
||
class EfficientNet | ||
{ | ||
public: | ||
EfficientNet(const std::string &config_file); | ||
~EfficientNet(); | ||
void LoadEngine(); | ||
bool InferenceFolder(const std::string &folder_name); | ||
|
||
private: | ||
void EngineInference(const std::vector<std::string> &image_list, const int &outSize,void **buffers, | ||
const std::vector<int64_t> &bufferSize, cudaStream_t stream); | ||
std::vector<float> prepareImage(std::vector<cv::Mat> & vec_img); | ||
std::string onnx_file; | ||
std::string engine_file; | ||
std::string labels_file; | ||
std::map<int, std::string> imagenet_labels; | ||
int BATCH_SIZE; | ||
int INPUT_CHANNEL; | ||
int IMAGE_WIDTH; | ||
int IMAGE_HEIGHT; | ||
std::vector<float> img_mean; | ||
std::vector<float> img_std; | ||
nvinfer1::ICudaEngine *engine = nullptr; | ||
nvinfer1::IExecutionContext *context = nullptr; | ||
}; | ||
|
||
#endif //EFFICIENTNET_TRT_EFFICIENTNET_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import onnx | ||
import keras2onnx | ||
import efficientnet.tfkeras as efn | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser(description='Export efficientnet ONNX') | ||
parser.add_argument('--batch_size', default=1, type=int, help='batch size.') | ||
args = parser.parse_args() | ||
|
||
model = efn.EfficientNetB0(weights='imagenet') | ||
|
||
onnx_model = keras2onnx.convert_keras(model, model.name) | ||
onnx_model.graph.input[0].type.tensor_type.shape.dim[0].dim_value = args.batch_size | ||
onnx_model.graph.output[0].type.tensor_type.shape.dim[0].dim_value = args.batch_size | ||
onnx.save_model(onnx_model, model.name + '.onnx') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import os | ||
import numpy as np | ||
import efficientnet.tfkeras as efn | ||
from tensorflow.keras.applications.imagenet_utils import decode_predictions, preprocess_input | ||
from efficientnet.preprocessing import center_crop_and_resize | ||
from skimage.io import imread | ||
|
||
model = efn.EfficientNetB0(weights='imagenet') | ||
with open('./label.txt', 'r') as f: | ||
text_labels = [''.join(l.split("'")[1]) for l in f] | ||
|
||
image_list = os.listdir('./samples') | ||
for image_name in image_list: | ||
image_path = os.path.join('./samples', image_name) | ||
print(image_path) | ||
image = imread(image_path) | ||
image_size = model.input_shape[1] | ||
x = center_crop_and_resize(image, image_size=image_size) | ||
x = preprocess_input(x, mode='torch') | ||
inputs = np.expand_dims(x, 0) | ||
expected = model.predict(inputs) | ||
result = decode_predictions(expected, top=1) | ||
print('With prob = %.2f, it contains %s' % ( | ||
result[0][0][2] * 100, result[0][0][1])) |
Oops, something went wrong.