From 99ff02949b0d54450a79e3ce8c19ce3aeafead1a Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Wed, 29 Jun 2022 23:35:36 +0800
Subject: [PATCH 01/14] =?UTF-8?q?=E9=87=8D=E6=96=B0fork=E5=A2=9E=E5=8A=A0e?=
 =?UTF-8?q?nd2end=20ONNX=20=E5=AF=BC=E5=87=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deploy/ONNX/export_onnx.py |  25 +++++-
 yolov6/models/end2end.py   | 151 +++++++++++++++++++++++++++++++++++++
 2 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 yolov6/models/end2end.py

diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
index 963487c5..5dfaf9e6 100644
--- a/deploy/ONNX/export_onnx.py
+++ b/deploy/ONNX/export_onnx.py
@@ -28,6 +28,11 @@
     parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
     parser.add_argument('--inplace', action='store_true', help='set Detect() inplace=True')
     parser.add_argument('--simplify', action='store_true', help='simplify onnx model')
+    parser.add_argument('--end2end', action='store_true', help='export end2end onnx')
+    parser.add_argument('--max-wh', type=int, default=None, help='None for trt int for ort')
+    parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='conf threshold for NMS')
     parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     args = parser.parse_args()
     args.img_size *= 2 if len(args.img_size) == 1 else 1  # expand
@@ -57,6 +62,10 @@
                 m.act = SiLU()
         elif isinstance(m, Detect):
             m.inplace = args.inplace
+    if args.end2end:
+        from yolov6.models.end2end import End2End
+        model = End2End(model, max_obj=args.topk_all, iou_thres=args.iou_thres,
+                        score_thres=args.conf_thres, max_wh=args.max_wh, device=device)
 
     y = model(img)  # dry run
 
@@ -69,12 +78,19 @@
                               training=torch.onnx.TrainingMode.EVAL,
                               do_constant_folding=True,
                               input_names=['image_arrays'],
-                              output_names=['outputs'],
-                             )
+                              output_names=['num_dets', 'det_boxes', 'det_scores', 'det_classes']
+                              if args.end2end and args.max_wh is None else ['outputs'],)
             f.seek(0)
             # Checks
             onnx_model = onnx.load(f)  # load onnx model
             onnx.checker.check_model(onnx_model)  # check onnx model
+            # Fix output shape
+            if args.end2end and args.max_wh is None:
+                shapes = [args.batch_size, 1, args.batch_size, args.topk_all, 4,
+                          args.batch_size, args.topk_all, args.batch_size, args.topk_all]
+                for i in onnx_model.graph.output:
+                    for j in i.type.tensor_type.shape.dim:
+                        j.dim_param = str(shapes.pop(0))
         if args.simplify:
             try:
                 import onnxsim
@@ -90,3 +106,8 @@
 
     # Finish
     LOGGER.info('\nExport complete (%.2fs)' % (time.time() - t))
+    if args.end2end:
+        if args.max_wh is None:
+            LOGGER.info('\nYou can export tensorrt engine use trtexec tools.\nCommand is:')
+            LOGGER.info(f'trtexec --onnx={export_file} --saveEngine={export_file.replace(".onnx",".engine")}')
+
diff --git a/yolov6/models/end2end.py b/yolov6/models/end2end.py
new file mode 100644
index 00000000..c6e99326
--- /dev/null
+++ b/yolov6/models/end2end.py
@@ -0,0 +1,151 @@
+import torch
+import torch.nn as nn
+import random
+
+class ORT_NMS(torch.autograd.Function):
+
+    @staticmethod
+    def forward(ctx,
+                boxes,
+                scores,
+                max_output_boxes_per_class=torch.tensor([100]),
+                iou_threshold=torch.tensor([0.45]),
+                score_threshold=torch.tensor([0.25])):
+        device = boxes.device
+        batch = scores.shape[0]
+        num_det = random.randint(0, 100)
+        batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device)
+        idxs = torch.arange(100, 100 + num_det).to(device)
+        zeros = torch.zeros((num_det,), dtype=torch.int64).to(device)
+        selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous()
+        selected_indices = selected_indices.to(torch.int64)
+        return selected_indices
+
+    @staticmethod
+    def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold):
+        return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold)
+
+class TRT_NMS(torch.autograd.Function):
+    @staticmethod
+    def forward(
+        ctx,
+        boxes,
+        scores,
+        background_class=-1,
+        box_coding=0,
+        iou_threshold=0.45,
+        max_output_boxes=100,
+        plugin_version="1",
+        score_activation=0,
+        score_threshold=0.25,
+    ):
+        batch_size, num_boxes, num_classes = scores.shape
+        num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
+        det_boxes = torch.randn(batch_size, max_output_boxes, 4)
+        det_scores = torch.randn(batch_size, max_output_boxes)
+        det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
+
+        return num_det, det_boxes, det_scores, det_classes
+
+    @staticmethod
+    def symbolic(g,
+                 boxes,
+                 scores,
+                 background_class=-1,
+                 box_coding=0,
+                 iou_threshold=0.45,
+                 max_output_boxes=100,
+                 plugin_version="1",
+                 score_activation=0,
+                 score_threshold=0.25):
+        out = g.op("TRT::EfficientNMS_TRT",
+                     boxes,
+                     scores,
+                     background_class_i=background_class,
+                     box_coding_i=box_coding,
+                     iou_threshold_f=iou_threshold,
+                     max_output_boxes_i=max_output_boxes,
+                     plugin_version_s=plugin_version,
+                     score_activation_i=score_activation,
+                     score_threshold_f=score_threshold,
+                     outputs=4)
+        nums, boxes, scores, classes = out
+        return nums,boxes,scores,classes
+
+
+
+class ONNX_ORT(nn.Module):
+
+    def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None):
+        super().__init__()
+        self.device = device if device else torch.device("cpu")
+        self.max_obj = torch.tensor([max_obj]).to(device)
+        self.iou_threshold = torch.tensor([iou_thres]).to(device)
+        self.score_threshold = torch.tensor([score_thres]).to(device)
+        self.max_wh = max_wh
+        self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
+                                           dtype=torch.float32,
+                                           device=self.device)
+
+    def forward(self, x):
+        box = x[:, :, :4]
+        conf = x[:, :, 4:5]
+        score = x[:, :, 5:]
+        score *= conf
+        box @= self.convert_matrix
+        objScore, objCls = score.max(2, keepdim=True)
+        dis = objCls.float() * self.max_wh
+        nmsbox = box + dis
+        objScore1 = objScore.transpose(1, 2).contiguous()
+        selected_indices = ORT_NMS.apply(nmsbox, objScore1, self.max_obj, self.iou_threshold, self.score_threshold)
+        X, Y = selected_indices[:, 0], selected_indices[:, 2]
+        resBoxes = box[X, Y, :]
+        resClasses = objCls[X, Y, :].float()
+        resScores = objScore[X, Y, :]
+        X = X.unsqueeze(1).float()
+        return torch.concat([X, resBoxes, resClasses, resScores], 1)
+
+class ONNX_TRT(nn.Module):
+
+    def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None):
+        super().__init__()
+        assert max_wh is None
+        self.device = device if device else torch.device('cpu')
+        self.background_class = -1,
+        self.box_coding = 0,
+        self.iou_threshold = iou_thres
+        self.max_obj = max_obj
+        self.plugin_version = '1'
+        self.score_activation = 0
+        self.score_threshold = score_thres
+        self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
+                                           dtype=torch.float32,
+                                           device=self.device)
+
+    def forward(self, x):
+        box = x[:, :, :4]
+        conf = x[:, :, 4:5]
+        score = x[:, :, 5:]
+        score *= conf
+        box @= self.convert_matrix
+        num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(box, score, self.background_class, self.box_coding,
+                                                                    self.iou_threshold, self.max_obj,
+                                                                    self.plugin_version, self.score_activation,
+                                                                    self.score_threshold)
+        return num_det, det_boxes, det_scores, det_classes
+
+
+class End2End(nn.Module):
+
+    def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None):
+        super().__init__()
+        device = device if device else torch.device('cpu')
+        self.model = model.to(device)
+        self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT
+        self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device)
+        self.end2end.eval()
+
+    def forward(self, x):
+        x = self.model(x)
+        x = self.end2end(x)
+        return x

From b5d4d663add34e2f198ddb94a021e990bfd69e05 Mon Sep 17 00:00:00 2001
From: tripleMu <865626@163.com>
Date: Thu, 30 Jun 2022 17:50:12 +0800
Subject: [PATCH 02/14] Fix export file bugs

---
 deploy/ONNX/export_onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
index 47deffd1..5a532c9f 100644
--- a/deploy/ONNX/export_onnx.py
+++ b/deploy/ONNX/export_onnx.py
@@ -74,7 +74,7 @@
         LOGGER.info('\nStarting to export ONNX...')
         export_file = args.weights.replace('.pt', '.onnx')  # filename
         with BytesIO() as f:
-            torch.onnx.export(model, img, export_file, verbose=False, opset_version=12,
+            torch.onnx.export(model, img, f, verbose=False, opset_version=12,
                               training=torch.onnx.TrainingMode.EVAL,
                               do_constant_folding=True,
                               input_names=['image_arrays'],

From 5ccad8333c456bd4bcaae9168efabaa3a0c031be Mon Sep 17 00:00:00 2001
From: tripleMu <865626@163.com>
Date: Thu, 30 Jun 2022 17:52:00 +0800
Subject: [PATCH 03/14] Fix export_file

---
 deploy/ONNX/export_onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
index 5a532c9f..5dfaf9e6 100644
--- a/deploy/ONNX/export_onnx.py
+++ b/deploy/ONNX/export_onnx.py
@@ -99,7 +99,7 @@
                 assert check, 'assert check failed'
             except Exception as e:
                 LOGGER.info(f'Simplifier failure: {e}')
-        onnx.save(onnx_model, f)
+        onnx.save(onnx_model, export_file)
         LOGGER.info(f'ONNX export success, saved as {export_file}')
     except Exception as e:
         LOGGER.info(f'ONNX export failure: {e}')

From 154cd924c74a440e4ba8fb34fe90fc78da79120f Mon Sep 17 00:00:00 2001
From: tripleMu <865626@163.com>
Date: Thu, 30 Jun 2022 18:21:20 +0800
Subject: [PATCH 04/14] Fix code commit and add README for export

---
 README.md                       |  8 +--
 deploy/ONNX/README.md           | 94 ++++++++++++++++++++++++++++++---
 deploy/ONNX/export_onnx.py      |  1 -
 yolov6/core/engine.py           |  2 +-
 yolov6/core/evaler.py           |  2 +-
 yolov6/layers/dbb_transforms.py |  2 +-
 6 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index ab64e70d..a0384b94 100644
--- a/README.md
+++ b/README.md
@@ -96,10 +96,10 @@ python tools/eval.py --data data/coco.yaml --batch 32 --weights yolov6s.pt --tas
 - Comparisons of the mAP and speed of different object detectors are tested on [COCO val2017](https://cocodataset.org/#download) dataset.
 - Refer to [Test speed](./docs/Test_speed.md) tutorial to reproduce the speed results of YOLOv6.
 - Params and Flops of YOLOv6 are estimated on deployed model.
-- Speed results of other methods are tested in our environment using official codebase and model if not found from the corresponding official release.  
-  
- ## Third-party resources  
+- Speed results of other methods are tested in our environment using official codebase and model if not found from the corresponding official release.
+
+ ## Third-party resources
  * YOLOv6 NCNN Android app demo: [ncnn-android-yolov6](https://github.com/FeiGeChuanShu/ncnn-android-yolov6) from [FeiGeChuanShu](https://github.com/FeiGeChuanShu)
- * YOLOv6 ONNXRuntime/MNN/TNN C++: [YOLOv6-ORT](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolov6.cpp), [YOLOv6-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolov6.cpp) and [YOLOv6-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolov6.cpp) from [DefTruth](https://github.com/DefTruth) 
+ * YOLOv6 ONNXRuntime/MNN/TNN C++: [YOLOv6-ORT](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolov6.cpp), [YOLOv6-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolov6.cpp) and [YOLOv6-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolov6.cpp) from [DefTruth](https://github.com/DefTruth)
  * YOLOv6 TensorRT Python: [yolov6-tensorrt-python](https://github.com/Linaom1214/tensorrt-python/blob/main/yolov6/trt.py) from [Linaom1214](https://github.com/Linaom1214)
  * YOLOv6 TensorRT Windows C++: [yolort](https://github.com/zhiqwang/yolov5-rt-stack/tree/main/deployment/tensorrt-yolov6) from [Wei Zeng](https://github.com/Wulingtian)
diff --git a/deploy/ONNX/README.md b/deploy/ONNX/README.md
index 5509bd0b..e5f0eae5 100644
--- a/deploy/ONNX/README.md
+++ b/deploy/ONNX/README.md
@@ -1,17 +1,99 @@
-## Export ONNX Model
+# Export ONNX Model
 
-### Check requirements
+## Check requirements
 ```shell
 pip install onnx>=1.10.0
 ```
 
-### Export script
+## Export script
 ```shell
-python deploy/ONNX/export_onnx.py --weights yolov6s.pt --img 640 --batch 1
-
+python ./deploy/ONNX/export_onnx.py \
+    --weights yolov6s.pt \
+    --img 640 \
+    --batch 1
 ```
 
-### Download
+
+
+#### Description of all arguments
+
+- `--weights` : The path of yolov6 model weights.
+- `--img` : Image size of model inputs.
+- `--batch` : Batch size of model inputs.
+- `--half` : Whether to export half-precision model.
+- `--inplace` : Whether to set Detect() inplace.
+- `--simplify` : Whether to simplify onnx. Not support in end to end export.
+- `--end2end` : Whether to export end to end onnx model. Only support onnxruntime and TensorRT >= 8.0.0 .
+- `--max-wh` : Default is None for TensorRT backend. Set int for onnxruntime backend.
+- `--topk-all` : Topk objects for every image.
+- `--iou-thres` : IoU threshold for NMS algorithm.
+- `--conf-thres` : Confidence threshold for NMS algorithm.
+- `--device` : Export device. Cuda device : 0 or 0,1,2,3 ... , CPU : cpu .
+
+## Download
+
 * [YOLOv6-nano](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6n.onnx)
 * [YOLOv6-tiny](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6t.onnx)
 * [YOLOv6-s](https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6s.onnx)
+
+## End2End export
+
+Now YOLOv6 supports end to end detect for onnxruntime and TensorRT !
+
+If you want to deploy in TensorRT, make sure you have installed TensorRT >= 8.0.0 !
+
+### onnxruntime backend
+#### Usage
+
+```bash
+python ./deploy/ONNX/export_onnx.py \
+    --weights yolov6s.pt \
+    --img 640 \
+    --batch 1 \
+    --end2end \
+    --max-wh 7680
+```
+
+You will get an onnx with **NonMaxSuppression** operater .
+
+The onnx outputs shape is ```nums x 7```.
+
+```nums``` means the number of all objects which were detected.
+
+```7```  means [`batch_index`,`x0`,`y0`,`x1`,` y1`,`classid`,`score`]
+
+### TensorRT backend (TensorRT version>= 8.0.0)
+
+#### Usage
+
+```bash
+python ./deploy/ONNX/export_onnx.py \
+    --weights yolov6s.pt \
+    --img 640 \
+    --batch 1 \
+    --end2end
+```
+
+You will get an onnx with **[EfficientNMS_TRT](https://github.com/NVIDIA/TensorRT/tree/main/plugin/efficientNMSPlugin)** plugin .
+The onnx outputs are as shown :
+
+<img src="https://user-images.githubusercontent.com/92794867/176650971-a4fa3d65-10d4-4b65-b8ef-00a2ff13406c.png" height="300px" />
+
+```num_dets``` means the number of object in every image in its batch .
+
+```det_boxes``` means topk(100) object's location about [`x0`,`y0`,`x1`,` y1`] .
+
+```det_scores``` means the confidence score of every topk(100) objects .
+
+```det_classes``` means the category of every topk(100) objects .
+
+```nums``` means the category of every topk(100) objects.
+
+You can export TensorRT engine use [trtexec](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#trtexec-ovr) tools.
+#### Usage
+``` shell
+/path/to/trtexec \
+    --onnx=yolov6s.onnx \
+    --saveEngine=yolov6s.engine \
+    --fp16 # if export TensorRT fp16 model
+```
diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
index 5dfaf9e6..8efcf30a 100644
--- a/deploy/ONNX/export_onnx.py
+++ b/deploy/ONNX/export_onnx.py
@@ -110,4 +110,3 @@
         if args.max_wh is None:
             LOGGER.info('\nYou can export tensorrt engine use trtexec tools.\nCommand is:')
             LOGGER.info(f'trtexec --onnx={export_file} --saveEngine={export_file.replace(".onnx",".engine")}')
-
diff --git a/yolov6/core/engine.py b/yolov6/core/engine.py
index 9b974386..273b6f7b 100644
--- a/yolov6/core/engine.py
+++ b/yolov6/core/engine.py
@@ -55,7 +55,7 @@ def __init__(self, args, cfg, device):
             assert os.path.isfile(args.resume), 'ERROR: --resume checkpoint does not exists'
             self.ckpt = torch.load(args.resume, map_location='cpu')
             self.start_epoch = self.ckpt['epoch'] + 1
-            
+
         self.max_epoch = args.epochs
         self.max_stepnum = len(self.train_loader)
         self.batch_size = args.batch_size
diff --git a/yolov6/core/evaler.py b/yolov6/core/evaler.py
index d19ca640..569e4e3b 100644
--- a/yolov6/core/evaler.py
+++ b/yolov6/core/evaler.py
@@ -103,7 +103,7 @@ def predict_model(self, model, dataloader, task):
     def eval_model(self, pred_results, model, dataloader, task):
         '''Evaluate models
         For task speed, this function only evaluates the speed of model and outputs inference time.
-        For task val, this function evaluates the speed and mAP by pycocotools, and returns 
+        For task val, this function evaluates the speed and mAP by pycocotools, and returns
         inference time and mAP value.
         '''
         LOGGER.info(f'\nEvaluating speed.')
diff --git a/yolov6/layers/dbb_transforms.py b/yolov6/layers/dbb_transforms.py
index c79587c3..e60cbd4d 100644
--- a/yolov6/layers/dbb_transforms.py
+++ b/yolov6/layers/dbb_transforms.py
@@ -47,4 +47,4 @@ def transV_avg(channels, kernel_size, groups):
 def transVI_multiscale(kernel, target_kernel_size):
     H_pixels_to_pad = (target_kernel_size - kernel.size(2)) // 2
     W_pixels_to_pad = (target_kernel_size - kernel.size(3)) // 2
-    return F.pad(kernel, [H_pixels_to_pad, H_pixels_to_pad, W_pixels_to_pad, W_pixels_to_pad])
\ No newline at end of file
+    return F.pad(kernel, [H_pixels_to_pad, H_pixels_to_pad, W_pixels_to_pad, W_pixels_to_pad])

From a2596062f43d8a72572b193a8a6ee059b1183e98 Mon Sep 17 00:00:00 2001
From: tripleMu <865626@163.com>
Date: Thu, 30 Jun 2022 18:24:58 +0800
Subject: [PATCH 05/14] Remove nums

---
 deploy/ONNX/README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/deploy/ONNX/README.md b/deploy/ONNX/README.md
index e5f0eae5..fcbb0035 100644
--- a/deploy/ONNX/README.md
+++ b/deploy/ONNX/README.md
@@ -87,7 +87,6 @@ The onnx outputs are as shown :
 
 ```det_classes``` means the category of every topk(100) objects .
 
-```nums``` means the category of every topk(100) objects.
 
 You can export TensorRT engine use [trtexec](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#trtexec-ovr) tools.
 #### Usage

From ac6a5e3d95ac9d3af47ef1498601ba322a030800 Mon Sep 17 00:00:00 2001
From: Chilicyy <lichuyi@meituan.com>
Date: Fri, 1 Jul 2022 16:57:07 +0800
Subject: [PATCH 06/14] fix increment_path bug

---
 tools/train.py          | 3 ++-
 yolov6/utils/general.py | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index b2bb5022..3cec2011 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -47,7 +47,8 @@ def check_and_init(args):
     '''check config files and device, and initialize '''
 
     # check files
-    args.save_dir = str(increment_name(osp.join(args.output_dir, args.name)))
+    master_process = args.rank == 0 if args.world_size > 1 else args.rank == -1
+    args.save_dir = str(increment_name(osp.join(args.output_dir, args.name), master_process))
     os.makedirs(args.save_dir, exist_ok=True)
     cfg = Config.fromfile(args.conf_file)
 
diff --git a/yolov6/utils/general.py b/yolov6/utils/general.py
index f81b1f3a..61f298eb 100644
--- a/yolov6/utils/general.py
+++ b/yolov6/utils/general.py
@@ -1,15 +1,17 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
 import os
 from pathlib import Path
 
-def increment_name(path):
+def increment_name(path, master_process):
     "increase save directory's id"
     path = Path(path)
     sep = ''
-    if path.exists():
+    if path.exists() and master_process:
         path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
         for n in range(1, 9999):
             p = f'{path}{sep}{n}{suffix}'
             if not os.path.exists(p):
                 break
         path = Path(p)
-    return path
+    return path
\ No newline at end of file

From aa12f1017abaf6ae27d2bcb86a2f2806ee4d3439 Mon Sep 17 00:00:00 2001
From: Chilicyy <lichuyi@meituan.com>
Date: Fri, 1 Jul 2022 17:59:17 +0800
Subject: [PATCH 07/14] fix increment_path bug

---
 tools/train.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index 3cec2011..ee2504f7 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -49,7 +49,6 @@ def check_and_init(args):
     # check files
     master_process = args.rank == 0 if args.world_size > 1 else args.rank == -1
     args.save_dir = str(increment_name(osp.join(args.output_dir, args.name), master_process))
-    os.makedirs(args.save_dir, exist_ok=True)
     cfg = Config.fromfile(args.conf_file)
 
     # check device
@@ -59,7 +58,9 @@ def check_and_init(args):
     set_random_seed(1+args.rank, deterministic=(args.rank == -1))
 
     # save args
-    save_yaml(vars(args), osp.join(args.save_dir, 'args.yaml'))
+    if master_process:
+        os.makedirs(args.save_dir)
+        save_yaml(vars(args), osp.join(args.save_dir, 'args.yaml'))
 
     return cfg, device
 

From de5c0e02fa793be570f5df6ba8fda5b7ba939034 Mon Sep 17 00:00:00 2001
From: MTChengMeng <chengmeng05@meituan.com>
Date: Fri, 1 Jul 2022 19:53:17 +0800
Subject: [PATCH 08/14] fix a bug in checking process

---
 yolov6/data/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yolov6/data/datasets.py b/yolov6/data/datasets.py
index 38b062c2..1a6fe4db 100644
--- a/yolov6/data/datasets.py
+++ b/yolov6/data/datasets.py
@@ -284,7 +284,7 @@ def get_imgs_labels(self, img_dir):
                     ne_per_file,
                     msg,
                 ) in pbar:
-                    if img_path:
+                    if nc_per_file == 0:
                         img_info[img_path]["labels"] = labels_per_file
                     else:
                         img_info.pop(img_path)
@@ -484,7 +484,7 @@ def check_label_files(args):
         except Exception as e:
             nc = 1
             msg = f"WARNING: {lb_path}: ignoring invalid labels: {e}"
-            return None, None, nc, nm, nf, ne, msg
+            return img_path, None, nc, nm, nf, ne, msg
 
     @staticmethod
     def generate_coco_format_labels(img_info, class_names, save_path):

From 78eceeed67239c41814f8ff2af774ee3614e7bc4 Mon Sep 17 00:00:00 2001
From: liangliang <jianghongliang02@meituan.com>
Date: Fri, 1 Jul 2022 20:48:27 +0800
Subject: [PATCH 09/14] fix default dist_url value bug.

---
 tools/train.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index b2bb5022..fdb426fc 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -22,20 +22,20 @@ def get_args_parser(add_help=True):
     parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Training', add_help=add_help)
     parser.add_argument('--data-path', default='./data/coco.yaml', type=str, help='path of dataset')
     parser.add_argument('--conf-file', default='./configs/yolov6s.py', type=str, help='experiments description file')
-    parser.add_argument('--img-size', type=int, default=640, help='train, val image size (pixels)')
+    parser.add_argument('--img-size', default=640, type=int, help='train, val image size (pixels)')
     parser.add_argument('--batch-size', default=32, type=int, help='total batch size for all GPUs')
     parser.add_argument('--epochs', default=400, type=int, help='number of total epochs to run')
     parser.add_argument('--workers', default=8, type=int, help='number of data loading workers (default: 8)')
     parser.add_argument('--device', default='0', type=str, help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--eval-interval', type=int, default=20, help='evaluate at every interval epochs')
+    parser.add_argument('--eval-interval', default=20, type=int, help='evaluate at every interval epochs')
     parser.add_argument('--eval-final-only', action='store_true', help='only evaluate at the final epoch')
-    parser.add_argument('--heavy-eval-range', default=50,
+    parser.add_argument('--heavy-eval-range', default=50, type=int,
                         help='evaluating every epoch for last such epochs (can be jointly used with --eval-interval)')
     parser.add_argument('--check-images', action='store_true', help='check images when initializing datasets')
     parser.add_argument('--check-labels', action='store_true', help='check label files when initializing datasets')
     parser.add_argument('--output-dir', default='./runs/train', type=str, help='path to save outputs')
     parser.add_argument('--name', default='exp', type=str, help='experiment name, saved to output_dir/name')
-    parser.add_argument('--dist_url', type=str, default="default url: tcp://127.0.0.1:8888")
+    parser.add_argument('--dist_url', default='env://', type=str, help='url used to set up distributed training')
     parser.add_argument('--gpu_count', type=int, default=0)
     parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter')
     parser.add_argument('--resume', type=str, default=None, help='resume the corresponding ckpt')

From 2037644eaa369d7ca7ef3d0e38d9d8ecb75b517a Mon Sep 17 00:00:00 2001
From: LemonWang0110 <LemonWang0110@126.com>
Date: Sat, 2 Jul 2022 09:20:57 +0800
Subject: [PATCH 10/14] modify torch.concat() to torch.cat()

---
 yolov6/models/end2end.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yolov6/models/end2end.py b/yolov6/models/end2end.py
index c6e99326..5715f579 100644
--- a/yolov6/models/end2end.py
+++ b/yolov6/models/end2end.py
@@ -103,7 +103,7 @@ def forward(self, x):
         resClasses = objCls[X, Y, :].float()
         resScores = objScore[X, Y, :]
         X = X.unsqueeze(1).float()
-        return torch.concat([X, resBoxes, resClasses, resScores], 1)
+        return torch.cat([X, resBoxes, resClasses, resScores], 1)
 
 class ONNX_TRT(nn.Module):
 

From bc71e9aa0705776e6ec72e350635d898e52e7bb9 Mon Sep 17 00:00:00 2001
From: Chilicyy <lichuyi@meituan.com>
Date: Sun, 3 Jul 2022 12:34:28 +0800
Subject: [PATCH 11/14] fix increment_name bug when evaluation

---
 tools/train.py          | 2 +-
 yolov6/utils/general.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index ee2504f7..97497acf 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -48,7 +48,7 @@ def check_and_init(args):
 
     # check files
     master_process = args.rank == 0 if args.world_size > 1 else args.rank == -1
-    args.save_dir = str(increment_name(osp.join(args.output_dir, args.name), master_process))
+    args.save_dir = str(increment_name(osp.join(args.output_dir, args.name)))
     cfg = Config.fromfile(args.conf_file)
 
     # check device
diff --git a/yolov6/utils/general.py b/yolov6/utils/general.py
index 61f298eb..bb5ab76c 100644
--- a/yolov6/utils/general.py
+++ b/yolov6/utils/general.py
@@ -3,11 +3,11 @@
 import os
 from pathlib import Path
 
-def increment_name(path, master_process):
+def increment_name(path):
     "increase save directory's id"
     path = Path(path)
     sep = ''
-    if path.exists() and master_process:
+    if path.exists():
         path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
         for n in range(1, 9999):
             p = f'{path}{sep}{n}{suffix}'

From 2e1c276f931e651a10920f2dbed10f4db0f515a1 Mon Sep 17 00:00:00 2001
From: wengkaiheng <wengkaiheng@meituan.com>
Date: Sun, 3 Jul 2022 19:57:47 +0800
Subject: [PATCH 12/14] feat: update environments for speed test

---
 docs/Test_speed.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Test_speed.md b/docs/Test_speed.md
index ac8b24bb..71b6fc0e 100644
--- a/docs/Test_speed.md
+++ b/docs/Test_speed.md
@@ -10,7 +10,7 @@ Download the models you want to test from the latest release.
 
 Refer to README, install packages corresponding to CUDA, CUDNN and TensorRT version.
 
-Here, we use Torch1.8.0 inference on V100 and TensorRT 7.2 on T4.
+Here, we use Torch1.8.0 inference on V100 and TensorRT 7.2 Cuda 10.2 Cudnn 8.0.2 on T4.
 
 ## 2. Reproduce speed
 

From c2417516e57d13b7283c4f469412a9ce6d53c760 Mon Sep 17 00:00:00 2001
From: jianghongliang02 <jianghongliang02@meituan.com>
Date: Sun, 3 Jul 2022 21:57:13 +0800
Subject: [PATCH 13/14] fix spell mistake of LOGGER.warning

---
 yolov6/core/inferer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yolov6/core/inferer.py b/yolov6/core/inferer.py
index 33a028c1..dcaa5a2a 100644
--- a/yolov6/core/inferer.py
+++ b/yolov6/core/inferer.py
@@ -102,7 +102,7 @@ def precess_image(path, img_size, stride, half):
             img_src = cv2.imread(path)
             assert img_src is not None, f'Invalid image: {path}'
         except Exception as e:
-            LOGGER.Warning(e)
+            LOGGER.warning(e)
         image = letterbox(img_src, img_size, stride=stride)[0]
 
         # Convert

From da1d4581c808b88912bf7e22f8a1602a0e9285e9 Mon Sep 17 00:00:00 2001
From: wengkaiheng <wengkaiheng@meituan.com>
Date: Mon, 4 Jul 2022 00:06:07 +0800
Subject: [PATCH 14/14] fix: remove redundant blanks

---
 docs/Test_speed.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/Test_speed.md b/docs/Test_speed.md
index 71b6fc0e..871b33b9 100644
--- a/docs/Test_speed.md
+++ b/docs/Test_speed.md
@@ -16,25 +16,25 @@ Here, we use Torch1.8.0 inference on V100 and TensorRT 7.2 Cuda 10.2 Cudnn 8.0.2
 
 #### 2.1 Torch Inference on V100
 
-To get inference speed without TensorRT on V100,  you can run the following command:
+To get inference speed without TensorRT on V100, you can run the following command:
 
 ```shell
-python tools/eval.py --data data/coco.yaml  --batch 32 --weights yolov6n.pt --task speed [--half]
+python tools/eval.py --data data/coco.yaml --batch 32 --weights yolov6n.pt --task speed [--half]
 ```
 
 - Speed results with batchsize = 1 are unstable in multiple runs, thus we do not provide the bs1 speed results.
 
 #### 2.2 TensorRT Inference on T4
 
-To  get inference speed with TensorRT in FP16 mode on T4, you can follow the steps below:
+To get inference speed with TensorRT in FP16 mode on T4, you can follow the steps below:
 
-First, export pytorch model as onnx format using the  following command:
+First, export pytorch model as onnx format using the following command:
 
 ```shell
 python deploy/ONNX/export_onnx.py --weights yolov6n.pt --device 0 --batch [1 or 32]
 ```
 
-Second,  generate an inference  trt engine and test speed using `trtexec`:
+Second, generate an inference trt engine and test speed using `trtexec`:
 
 ```
 trtexec --onnx=yolov6n.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw