resolve

Rosal-1998 · Jul 10, 2022 · 3d402c6 · 3d402c6
2 parents c582f35 + f8f9627
commit 3d402c6
Show file tree

Hide file tree

Showing 17 changed files with 533 additions and 75 deletions.
diff --git a/.gitignore b/.gitignore
@@ -110,3 +110,8 @@ venv.bak/
 
 #user scripts
 *.sh
+
+# model files
+*.onnx
+*.pt
+*.engine
diff --git a/README.md b/README.md
@@ -116,3 +116,4 @@ Your can also specify a checkpoint path to `--resume` parameter by
  * YOLOv6 ONNXRuntime/MNN/TNN C++: [YOLOv6-ORT](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolov6.cpp), [YOLOv6-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolov6.cpp) and [YOLOv6-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolov6.cpp) from [DefTruth](https://github.com/DefTruth)
  * YOLOv6 TensorRT Python: [yolov6-tensorrt-python](https://github.com/Linaom1214/tensorrt-python/blob/main/yolov6/trt.py) from [Linaom1214](https://github.com/Linaom1214)
  * YOLOv6 TensorRT Windows C++: [yolort](https://github.com/zhiqwang/yolov5-rt-stack/tree/main/deployment/tensorrt-yolov6) from [Wei Zeng](https://github.com/Wulingtian)
+ * YOLOv6 Quantization and Auto Compression Example [YOLOv6-ACT](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/example/auto_compression/pytorch_yolov6) from [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)
diff --git a/assets/image3.jpg b/assets/image3.jpg
diff --git a/assets/voc_loss_curve.jpg b/assets/voc_loss_curve.jpg
diff --git a/data/voc.yaml b/data/voc.yaml
@@ -0,0 +1,11 @@
+# Please insure that your custom_dataset are put in same parent dir with YOLOv6_DIR
+train: VOCdevkit/voc_07_12/images/train # train images
+val: VOCdevkit/voc_07_12/images/val # val images
+test: VOCdevkit/voc_07_12/images/val # test images (optional)
+
+# whether it is coco dataset, only coco dataset should be set to True.
+is_coco: False
+# Classes
+nc: 20  # number of classes
+names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']  # class names
diff --git a/deploy/ONNX/README.md b/deploy/ONNX/README.md
@@ -24,6 +24,7 @@ python ./deploy/ONNX/export_onnx.py \
 - `--inplace` : Whether to set Detect() inplace.
 - `--simplify` : Whether to simplify onnx. Not support in end to end export.
 - `--end2end` : Whether to export end to end onnx model. Only support onnxruntime and TensorRT >= 8.0.0 .
+- `--with-preprocess` : Whether to export preprocess with bgr2rgb and normalize (divide by 255)
 - `--max-wh` : Default is None for TensorRT backend. Set int for onnxruntime backend.
 - `--topk-all` : Topk objects for every image.
 - `--iou-thres` : IoU threshold for NMS algorithm.
@@ -60,7 +61,7 @@ The onnx outputs shape is ```nums x 7```.
 
 ```nums``` means the number of all objects which were detected.
 
-```7```  means [`batch_index`,`x0`,`y0`,`x1`,` y1`,`classid`,`score`]
+```7```  means [`batch_index`,`x0`,`y0`,`x1`,`y1`,`classid`,`score`]
 
 ### TensorRT backend (TensorRT version>= 8.0.0)
 
@@ -81,7 +82,7 @@ The onnx outputs are as shown :
 
 ```num_dets``` means the number of object in every image in its batch .
 
-```det_boxes``` means topk(100) object's location about [`x0`,`y0`,`x1`,` y1`] .
+```det_boxes``` means topk(100) object's location about [`x0`,`y0`,`x1`,`y1`] .
 
 ```det_scores``` means the confidence score of every topk(100) objects .
 

diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
@@ -29,7 +29,8 @@
     parser.add_argument('--inplace', action='store_true', help='set Detect() inplace=True')
     parser.add_argument('--simplify', action='store_true', help='simplify onnx model')
     parser.add_argument('--end2end', action='store_true', help='export end2end onnx')
-    parser.add_argument('--max-wh', type=int, default=None, help='None for trt int for ort')
+    parser.add_argument('--with-preprocess', action='store_true', help='export bgr2rgb and normalize')
+    parser.add_argument('--max-wh', type=int, default=None, help='None for tensorrt nms, int value for onnx-runtime nms')
     parser.add_argument('--topk-all', type=int, default=100, help='topk objects for every images')
     parser.add_argument('--iou-thres', type=float, default=0.45, help='iou threshold for NMS')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='conf threshold for NMS')
@@ -64,8 +65,8 @@
             m.inplace = args.inplace
     if args.end2end:
         from yolov6.models.end2end import End2End
-        model = End2End(model, max_obj=args.topk_all, iou_thres=args.iou_thres,
-                        score_thres=args.conf_thres, max_wh=args.max_wh, device=device)
+        model = End2End(model, max_obj=args.topk_all, iou_thres=args.iou_thres,score_thres=args.conf_thres,
+                        max_wh=args.max_wh, device=device, with_preprocess=args.with_preprocess)
 
     y = model(img)  # dry run
 

diff --git a/docs/tutorial_voc.ipynb b/docs/tutorial_voc.ipynb
@@ -0,0 +1,277 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training YOLOv6 on VOC dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 1: Prepare VOC dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "|  dataset | url | size  | images  |\n",
+    "|  :----:  |  :----:  |:----:  | :----:  |\n",
+    "| VOC2007 trainval  | [download zip](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar) | 446MB | 5012  \n",
+    "| VOC2007 test  | [download zip](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar) | 438MB | 4953\n",
+    "| VOC2012 trainval  | [download zip](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) | 1.95GB | 17126"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Download VOC dataset and unzip them, the directory shows like:\n",
+    "```\n",
+    "VOCdevkit\n",
+    "├── VOC2007\n",
+    "│   ├── Annotations\n",
+    "│   ├── ImageSets\n",
+    "│   ├── JPEGImages\n",
+    "│   ├── SegmentationClass\n",
+    "│   └── SegmentationObject\n",
+    "└── VOC2012\n",
+    "    ├── Annotations\n",
+    "    ├── ImageSets\n",
+    "    ├── JPEGImages\n",
+    "    ├── SegmentationClass\n",
+    "    └── SegmentationObject\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 2: Convert VOC dataset to YOLO-format."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The VOC dataset use xml format annotations as below. (refer to [VOC2007 guidelines](http://host.robots.ox.ac.uk/pascal/VOC/voc2007/guidelines.html))\n",
+    "```\n",
+    "<annotation>\n",
+    "\t<folder>VOC2007</folder>\n",
+    "\t<filename>000007.jpg</filename>\n",
+    "\t<source>\n",
+    "\t\t<database>The VOC2007 Database</database>\n",
+    "\t\t<annotation>PASCAL VOC2007</annotation>\n",
+    "\t\t<image>flickr</image>\n",
+    "\t\t<flickrid>194179466</flickrid>\n",
+    "\t</source>\n",
+    "\t<owner>\n",
+    "\t\t<flickrid>monsieurrompu</flickrid>\n",
+    "\t\t<name>Thom Zemanek</name>\n",
+    "\t</owner>\n",
+    "\t<size>\n",
+    "\t\t<width>500</width>\n",
+    "\t\t<height>333</height>\n",
+    "\t\t<depth>3</depth>\n",
+    "\t</size>\n",
+    "\t<segmented>0</segmented>\n",
+    "\t<object>\n",
+    "\t\t<name>car</name>\n",
+    "\t\t<pose>Unspecified</pose>\n",
+    "\t\t<truncated>1</truncated>\n",
+    "\t\t<difficult>0</difficult>\n",
+    "\t\t<bndbox>\n",
+    "\t\t\t<xmin>141</xmin>\n",
+    "\t\t\t<ymin>50</ymin>\n",
+    "\t\t\t<xmax>500</xmax>\n",
+    "\t\t\t<ymax>330</ymax>\n",
+    "\t\t</bndbox>\n",
+    "\t</object>\n",
+    "</annotation>\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run the following command to convert voc dataset to yolo format:\n",
+    "\n",
+    "&ensp;&ensp;`python yolov6/data/voc2yolo.py --voc_path your_path/to/VOCdevkit`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We follow the `07+12` training setting, which means using VOC2007 and VOC2012's train+val(16551) as training set, VOC2007's test(4952) as validation set and testing set.\n",
+    "\n",
+    "Finally, the directory looks like:\n",
+    "```\n",
+    "VOCdevkit\n",
+    "├── images\n",
+    "├── labels\n",
+    "├── voc_07_12\n",
+    "│   ├── images\n",
+    "│   │   ├── train\n",
+    "│   │   └── val\n",
+    "│   └── labels\n",
+    "│       ├── train\n",
+    "│       └── val\n",
+    "├── VOC2007\n",
+    "└── VOC2012\n",
+    "```\n",
+    "Where `voc_07_12` is the converted yolo-format dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Visualize yolo format dataset (Optional)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To check if your dataset is correct, run the following command:\n",
+    "\n",
+    "&ensp;&ensp;`python yolov6/data/vis_dataset.py --img_dir your_path/to/VOCdevkit/images/train --label_dir your_path/to/VOCdevkit/labels/train`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3: Create dataset config file."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create `data/voc.yaml` like:\n",
+    "\n",
+    "```\n",
+    "# Please insure that your custom_dataset are put in same parent dir with YOLOv6_DIR\n",
+    "train: your_path/to/VOCdevkit/voc_07_12/images/train # train images\n",
+    "val: your_path/to/VOCdevkit/voc_07_12/images/val # val images\n",
+    "test: your_path/to/VOCdevkit/voc_07_12/images/val # test images (optional)\n",
+    "\n",
+    "# whether it is coco dataset, only coco dataset should be set to True.\n",
+    "is_coco: False\n",
+    "# Classes\n",
+    "nc: 20  # number of classes\n",
+    "names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',\n",
+    "        'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']  # class names\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 4: Training.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the following command to start training:\n",
+    "- Multi GPUs (DDP mode recommended)\n",
+    "\n",
+    "&ensp;&ensp;`python -m torch.distributed.launch --nproc_per_node 4 --master_port=23456 tools/train.py --batch 256 --conf configs/yolov6n_finetune.py --data data/voc.yaml --device 0,1,2,3`\n",
+    "\n",
+    "- Single GPU\n",
+    "\n",
+    "&ensp;&ensp;`python tools/train.py --batch 256 --conf configs/yolov6_finetune.py --data data/data.yaml --device 0`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Tensorboard\n",
+    "We can use tensorboard to visualize the loss/mAP curve, run:\n",
+    "\n",
+    "&ensp;&ensp;`tensorboard --logdir=your_path/to/log`\n",
+    "\n",
+    "![Traing loss/mAP curve](../assets/voc_loss_curve.jpg 'Traing loss/mAP curve')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Evaluation\n",
+    "When training finished, it automatically do evaulation on the testset, the output metrics are:\n",
+    "```\n",
+    "DONE (t=4.21s).\n",
+    " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.632\n",
+    " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.854\n",
+    " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.702\n",
+    " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.272\n",
+    " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.473\n",
+    " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.689\n",
+    " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.518\n",
+    " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.737\n",
+    " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.751\n",
+    " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.554\n",
+    " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.656\n",
+    " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.791\n",
+    "Epoch: 399 | [email protected]: 0.8542516455615079 | [email protected]:0.95: 0.6315693468708705\n",
+    "\n",
+    "Training completed in 9.206 hours.\n",
+    "```\n",
+    "Or you can manually evaulation model on your dataset by:\n",
+    "\n",
+    "&ensp;&ensp;`python tools/eval.py --data data/voc.yaml  --weights your_path/to/weights/best_ckpt.pt --device 0`"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5.Inference\n",
+    "\n",
+    "&ensp;&ensp;`python tools/infer.py --weights your_path/to/weights/best_ckpt.pt --yaml data/voc.yaml --source data/images/image3.jpg --device 0`\n",
+    "\n",
+    "![image3.jpg](../assets/image3.jpg)\n",
+    "### 6. Deployment\n",
+    "\n",
+    "&ensp;&ensp;`python deploy/ONNX/export_onnx.py --weights your_path/to/weights/best_ckpt.pt --device 0`"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.8.2 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tools/infer.py b/tools/infer.py
@@ -60,10 +60,7 @@ def run(weights=osp.join(ROOT, 'yolov6s.pt'),
         hide_conf=False,
         half=False,
         ):
-    """ Inference process
-
-    This function is the main process of inference, supporting image files or dirs containing images.
-
+    """ Inference process, supporting inference on one image file or directory which containing images.
     Args:
         weights: The path of model.pt, e.g. yolov6s.pt
         source: Source path, supporting image files or dirs containing images.
@@ -91,7 +88,9 @@ def run(weights=osp.join(ROOT, 'yolov6s.pt'),
     else:
         LOGGER.warning('Save directory already existed')
     if save_txt:
-        os.mkdir(osp.join(save_dir, 'labels'))
+        save_txt_path = osp.join(save_dir, 'labels')
+        if not osp.exists(save_txt_path):
+            os.makedirs(save_txt_path)
 
     # Inference
     inferer = Inferer(source, weights, device, yaml, img_size, half)

diff --git a/yolov6/core/engine.py b/yolov6/core/engine.py
@@ -22,6 +22,7 @@
 from yolov6.utils.checkpoint import load_state_dict, save_checkpoint, strip_optimizer
 from yolov6.solver.build import build_optimizer, build_lr_scheduler
 
+
 class Trainer:
     def __init__(self, args, cfg, device):
         self.args = args
@@ -65,7 +66,6 @@ def __init__(self, args, cfg, device):
         self.batch_size = args.batch_size
         self.img_size = args.img_size
 
-
     # Training Process
 
     def train(self):

diff --git a/yolov6/core/evaler.py b/yolov6/core/evaler.py
@@ -207,7 +207,7 @@ def convert_to_coco_format(self, outputs, imgs, paths, shapes, ids):
 
     @staticmethod
     def check_task(task):
-        if task not in ['train','val','speed']:
+        if task not in ['train', 'val', 'speed']:
             raise Exception("task argument error: only support 'train' / 'val' / 'speed' task.")
 
     @staticmethod