Merge pull request meituan#779 from meituan/reformat_parameter_of_inp…

…ut_size Reformat parameters of input size
ZHEQIUSHUI · Apr 14, 2023 · 37bb5c2 · 37bb5c2
2 parents d15a2a2 + 17dff7f
commit 37bb5c2
Show file tree

Hide file tree

Showing 13 changed files with 136 additions and 195 deletions.
diff --git a/configs/experiment/eval_640_repro.py b/configs/experiment/eval_640_repro.py
@@ -3,90 +3,57 @@
 eval_params = dict(
     default = dict(
         img_size=640,
-        test_load_size=638,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=2,
+        infer_on_rect=False,
     ),
     yolov6n = dict(
         img_size=640,
-        test_load_size=636,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=4,
+        infer_on_rect=False,
     ),
     yolov6t = dict(
         img_size=640,
-        test_load_size=634,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=6,
+        infer_on_rect=False,
     ),
     yolov6s = dict(
         img_size=640,
-        test_load_size=634,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=6,
+        infer_on_rect=False,
     ),
     yolov6m = dict(
         img_size=640,
-        test_load_size=636,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=4,
+        infer_on_rect=False,
     ),
     yolov6l = dict(
         img_size=640,
-        test_load_size=636,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=4,
+        infer_on_rect=False,
     ),
     yolov6l_relu = dict(
         img_size=640,
-        test_load_size=638,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=2,
+        infer_on_rect=False,
     ),
     yolov6n6 = dict(
         img_size=1280,
-        test_load_size=1263,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=17,
+        infer_on_rect=False,
     ),
     yolov6s6 = dict(
         img_size=1280,
-        test_load_size=1272,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=8,
+        infer_on_rect=False,
     ),
     yolov6m6 = dict(
         img_size=1280,
-        test_load_size=1216,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=64,
+        infer_on_rect=False,
     ),
     yolov6l6 = dict(
         img_size=1280,
-        test_load_size=1239,
-        letterbox_return_int=True,
-        scale_exact=True,
-        force_no_pad=True,
-        not_infer_on_rect=True,
+        shrink_size=41,
+        infer_on_rect=False,
     )
 )
diff --git a/configs/experiment/yolov6n_with_eval_params.py b/configs/experiment/yolov6n_with_eval_params.py
@@ -64,11 +64,8 @@
     iou_thres=0.65,
 
     #pading and scale coord
-    test_load_size=None, #None mean will be the same as test image size
-    letterbox_return_int=False,
-    force_no_pad=False,
-    not_infer_on_rect=False,
-    scale_exact=False,
+    shrink_size=None, # None mean will not shrink the image.
+    infer_on_rect=True,
 
     #metric
     verbose=False,

diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
@@ -34,8 +34,8 @@
     parser.add_argument('--ort', action='store_true', help='export onnx for onnxruntime')
     parser.add_argument('--with-preprocess', action='store_true', help='export bgr2rgb and normalize')
     parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images')
-    parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS')
-    parser.add_argument('--conf-thres', type=float, default=0.4, help='conf threshold for NMS')
+    parser.add_argument('--iou-thres', type=float, default=0.65, help='iou threshold for NMS')
+    parser.add_argument('--conf-thres', type=float, default=0.5, help='conf threshold for NMS')
     parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     args = parser.parse_args()
     args.img_size *= 2 if len(args.img_size) == 1 else 1  # expand

diff --git a/deploy/TensorRT/README.md b/deploy/TensorRT/README.md
@@ -22,13 +22,15 @@ python ./deploy/ONNX/export_onnx.py \
 Follow the file [post training README](../../tools/quantization/tensorrt/post_training/README.md) to convert and save the serialized engine file `yolov6.engine`.
 
 ```shell
-python3 onnx_to_tensorrt.py --fp16 --int8 -v \
+python3 onnx_to_tensorrt.py --model ${ONNX_MODEL} \
+        --dtype int8  \
         --max_calibration_size=${MAX_CALIBRATION_SIZE} \
         --calibration-data=${CALIBRATION_DATA} \
         --calibration-cache=${CACHE_FILENAME} \
         --preprocess_func=${PREPROCESS_FUNC} \
         --explicit-batch \
-        --onnx ${ONNX_MODEL} -o ${OUTPUT}
+        --verbose
+
 ```
 
 ## Step 3: build the demo

diff --git a/deploy/TensorRT/calibrator.py b/deploy/TensorRT/calibrator.py
@@ -5,7 +5,7 @@
 import numpy as np
 import cv2
 import glob
-from Processor import letterbox
+from tensorrt_processor import letterbox
 
 import ctypes
 import logging

diff --git a/deploy/TensorRT/eval_yolo_trt.py b/deploy/TensorRT/eval_yolo_trt.py
@@ -13,7 +13,7 @@
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
 
-from Processor import Processor
+from tensorrt_processor import Processor
 
 ROOT = os.getcwd()
 if str(ROOT) not in sys.path:
@@ -45,10 +45,7 @@ def parse_args():
         help='IOU threshold for NMS')
     parser.add_argument('--class_num', type=int, default=3, help='class list for general datasets that must be specified')
     parser.add_argument('--is_coco', action='store_true', help='whether the validation dataset is coco, default is False.')
-    parser.add_argument('--test_load_size', type=int, default=634, help='load img resize when test')
-    parser.add_argument('--letterbox_return_int', type=bool, default=True, help='return int offset for letterbox')
-    parser.add_argument('--scale_exact', type=bool, default=True, help='use exact scale size to scale coords')
-    parser.add_argument('--force_no_pad', type=bool, default=True, help='for no extra pad in letterbox')
+    parser.add_argument('--shrink_size', type=int, default=4, help='load img with size (img_size - shrink_size), for better performace.')
     parser.add_argument('--visualize', '-v', action="store_true", default=False, help='visualize demo')
     parser.add_argument('--num_imgs_to_visualize', type=int, default=10, help='number of images to visualize')
     parser.add_argument('--do_pr_metric', action='store_true', help='use pr_metric to evaluate models')
@@ -62,34 +59,26 @@ def parse_args():
     return args
 
 
-def scale_coords(scale_exact, img1_shape, coords, img0_shape, ratio_pad=None):
-        '''Rescale coords (xyxy) from img1_shape to img0_shape.'''
-        if ratio_pad is None:  # calculate from img0_shape
-            gain = [min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])]  # gain  = old / new
-            if scale_exact:
-                gain = [img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]]
-            pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
-        else:
-            gain = ratio_pad[0]
-            pad = ratio_pad[1]
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+    '''Rescale coords (xyxy) from img1_shape to img0_shape.'''
 
-        coords[:, [0, 2]] -= pad[0]  # x padding
-        if scale_exact:
-            coords[:, [0, 2]] /= gain[1]  # x gain
-        else:
-            coords[:, [0, 2]] /= gain[0]  # raw x gain
-        coords[:, [1, 3]] -= pad[1]  # y padding
-        coords[:, [1, 3]] /= gain[0]  # y gain
+    gain = ratio_pad[0]
+    pad = ratio_pad[1]
+
+    coords[:, [0, 2]] -= pad[0]  # x padding
+    coords[:, [0, 2]] /= gain[0]  # raw x gain
+    coords[:, [1, 3]] -= pad[1]  # y padding
+    coords[:, [1, 3]] /= gain[0]  # y gain
 
-        if isinstance(coords, torch.Tensor):  # faster individually
-            coords[:, 0].clamp_(0, img0_shape[1])  # x1
-            coords[:, 1].clamp_(0, img0_shape[0])  # y1
-            coords[:, 2].clamp_(0, img0_shape[1])  # x2
-            coords[:, 3].clamp_(0, img0_shape[0])  # y2
-        else:  # np.array (faster grouped)
-            coords[:, [0, 2]] = coords[:, [0, 2]].clip(0, img0_shape[1])  # x1, x2
-            coords[:, [1, 3]] = coords[:, [1, 3]].clip(0, img0_shape[0])  # y1, y2
-        return coords
+    if isinstance(coords, torch.Tensor):  # faster individually
+        coords[:, 0].clamp_(0, img0_shape[1])  # x1
+        coords[:, 1].clamp_(0, img0_shape[0])  # y1
+        coords[:, 2].clamp_(0, img0_shape[1])  # x2
+        coords[:, 3].clamp_(0, img0_shape[0])  # y2
+    else:  # np.array (faster grouped)
+        coords[:, [0, 2]] = coords[:, [0, 2]].clip(0, img0_shape[1])  # x1, x2
+        coords[:, [1, 3]] = coords[:, [1, 3]].clip(0, img0_shape[0])  # y1, y2
+    return coords
 
 
 def check_args(args):
@@ -100,7 +89,24 @@ def check_args(args):
         sys.exit('%s is not a valid file' % args.annotations)
 
 
-def generate_results(data_class, model_names, do_pr_metric, plot_confusion_matrix, processor, imgs_dir, labels_dir, valid_images, results_file, conf_thres, iou_thres, is_coco, batch_size=1, test_load_size=640, visualize=False, num_imgs_to_visualize=0, imgname2id={}):
+def generate_results(data_class,
+                      model_names, 
+                      do_pr_metric, 
+                      plot_confusion_matrix, 
+                      processor, 
+                      imgs_dir, 
+                      labels_dir, 
+                      valid_images, 
+                      results_file, 
+                      conf_thres, 
+                      iou_thres, 
+                      is_coco, 
+                      batch_size=1,
+                      img_size=[640, 640], 
+                      shrink_size=0, 
+                      visualize=False, 
+                      num_imgs_to_visualize=0, 
+                      imgname2id={}):
     """Run detection on each jpg and write results to file."""
     results = []
     pbar = tqdm(range(math.ceil(len(valid_images)/batch_size)), desc="TRT-Model test in val datasets.")
@@ -135,7 +141,7 @@ def generate_results(data_class, model_names, do_pr_metric, plot_confusion_matri
 
             img_src = img.copy()
             h0, w0 = img.shape[:2]
-            r = test_load_size / max(h0, w0)
+            r = (max(img_size) - shrink_size) / max(h0, w0)
             if r != 1:
                 img = cv2.resize(
                     img,
@@ -257,7 +263,7 @@ def main():
         model_names = list(range(0, args.class_num))
 
     # setup processor
-    processor = Processor(model=args.model, scale_exact=args.scale_exact, return_int=args.letterbox_return_int, force_no_pad=args.force_no_pad, is_end2end=args.is_end2end)
+    processor = Processor(model=args.model, is_end2end=args.is_end2end)
     image_names = [p for p in os.listdir(args.imgs_dir) if p.split(".")[-1].lower() in IMG_FORMATS]
     # Eliminate data with missing labels.
     with open(args.annotations) as f:
@@ -278,8 +284,24 @@ def main():
             continue
     assert len(valid_images) > 0, 'No valid images are found. Please check you image format or whether annotation file is match.'
     #targets=[j for j in os.listdir(args.labels_dir) if j.endswith('.txt')]
-    stats, seen = generate_results(data_class, model_names, args.do_pr_metric, args.plot_confusion_matrix, processor, args.imgs_dir, args.labels_dir, valid_images, results_file,  args.conf_thres, args.iou_thres, args.is_coco, batch_size=args.batch_size, test_load_size=args.test_load_size,
-                     visualize=args.visualize, num_imgs_to_visualize=args.num_imgs_to_visualize, imgname2id=imgname2id)
+    stats, seen = generate_results(data_class, 
+                                    model_names,
+                                    args.do_pr_metric,
+                                    args.plot_confusion_matrix,
+                                    processor, 
+                                    args.imgs_dir, 
+                                    args.labels_dir, 
+                                    valid_images, 
+                                    results_file,  
+                                    args.conf_thres, 
+                                    args.iou_thres, 
+                                    args.is_coco, 
+                                    batch_size=args.batch_size,
+                                    img_size = args.img_size, 
+                                    shrink_size=args.shrink_size,
+                                    visualize=args.visualize,
+                                    num_imgs_to_visualize=args.num_imgs_to_visualize, 
+                                    imgname2id=imgname2id)
 
     # Run COCO mAP evaluation
     # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb

diff --git a/deploy/TensorRT/Processor.py → deploy/TensorRT/tensorrt_processor.py b/deploy/TensorRT/Processor.py → deploy/TensorRT/tensorrt_processor.py
@@ -44,7 +44,7 @@ def get_input_shape(engine):
     else:
         raise ValueError('bad dims of binding %s: %s' % (binding, str(binding_dims)))
 
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=False, stride=32, return_int=False):
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=False, stride=32):
     # Resize and pad image while meeting stride-multiple constraints
     shape = im.shape[:2]  # current shape [height, width]
     if isinstance(new_shape, int):
@@ -70,18 +70,13 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleu
     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
     im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
-    if not return_int:
-        return im, r, (dw, dh)
-    else:
-        return im, r, (left, top)
+
+    return im, r, (left, top)
 
 
 class Processor():
-    def __init__(self, model, num_classes=80, num_layers=3, anchors=1, device=torch.device('cuda:0'), return_int=False, scale_exact=False, force_no_pad=False, is_end2end=False):
+    def __init__(self, model, num_classes=80, num_layers=3, anchors=1, device=torch.device('cuda:0'), is_end2end=False):
         # load tensorrt engine)
-        self.return_int = return_int
-        self.scale_exact = scale_exact
-        self.force_no_pad = force_no_pad
         self.is_end2end = is_end2end
         Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
         self.logger = trt.Logger(trt.Logger.INFO)
@@ -135,7 +130,7 @@ def pre_process(self, img_src, input_shape=None,):
         """Preprocess an image before TRT YOLO inferencing.
         """
         input_shape = input_shape if input_shape is not None else self.input_shape
-        image, ratio, pad = letterbox(img_src, input_shape, auto=False, return_int=self.return_int, scaleup=True)
+        image, ratio, pad = letterbox(img_src, input_shape, auto=False, scaleup=False)
         # Convert
         image = image.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
         image = torch.from_numpy(np.ascontiguousarray(image)).to(self.device).float()
@@ -278,20 +273,12 @@ def non_max_suppression(self, prediction, conf_thres=0.25, iou_thres=0.45, class
 
     def scale_coords(self, img1_shape, coords, img0_shape, ratio_pad=None):
         # Rescale coords (xyxy) from img1_shape to img0_shape
-        if ratio_pad is None:  # calculate from img0_shape
-            gain = [min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])]  # gain  = old / new
-            if self.scale_exact:
-                gain = [img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]]
-            pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
-        else:
-            gain = ratio_pad[0]
-            pad = ratio_pad[1]
+
+        gain = ratio_pad[0]
+        pad = ratio_pad[1]
 
         coords[:, [0, 2]] -= pad[0]  # x padding
-        if self.scale_exact:
-            coords[:, [0, 2]] /= gain[1]  # x gain
-        else:
-            coords[:, [0, 2]] /= gain[0]  # raw x gain
+        coords[:, [0, 2]] /= gain[0]  # raw x gain
         coords[:, [1, 3]] -= pad[1]  # y padding
         coords[:, [1, 3]] /= gain[0]  # y gain