Support rectangle train (meituan#818)

* add rectangle train. * support multi directory image reader.
ZHEQIUSHUI · May 8, 2023 · 494f58f · 494f58f
1 parent e7dd9b5
commit 494f58f
Show file tree

Hide file tree

Showing 19 changed files with 283 additions and 138 deletions.
diff --git a/README.md b/README.md
@@ -165,6 +165,8 @@ python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 128
 │   ├── README.txt
 ```
 
+YOLOv6 supports different input resolution modes. For details, see [How to Set the Input Size](./docs/About_training_size.md).
+
 </details>
 
 <details>

diff --git a/README_cn.md b/README_cn.md
@@ -156,6 +156,8 @@ python -m torch.distributed.launch --nproc_per_node 8 tools/train.py --batch 128
 │   │   ├── val2017
 ```
 
+YOLOv6 支持不同的输入分辨率模式，详情请参见 [如何设置输入大小](./docs/About_training_size_cn.md).
+
 </details>
 
 <details>

diff --git a/deploy/TensorRT/Processor.py b/deploy/TensorRT/Processor.py
@@ -170,7 +170,7 @@ def output_reformate(self, outputs):
 
             if self.grid[i].shape[2:4] != y.shape[2:4]:
                 d = self.stride.device
-                yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
+                yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)], indexing='ij')
                 self.grid[i] = torch.stack((xv, yv), 2).view(1, self.na, ny, nx, 2).float()
             if self.inplace:
                 y[..., 0:2] = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy

diff --git a/deploy/TensorRT/tensorrt_processor.py b/deploy/TensorRT/tensorrt_processor.py
@@ -165,7 +165,7 @@ def output_reformate(self, outputs):
 
             if self.grid[i].shape[2:4] != y.shape[2:4]:
                 d = self.stride.device
-                yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
+                yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)], indexing='ij')
                 self.grid[i] = torch.stack((xv, yv), 2).view(1, self.na, ny, nx, 2).float()
             if self.inplace:
                 y[..., 0:2] = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy

diff --git a/docs/About_training_size.md b/docs/About_training_size.md
@@ -0,0 +1,18 @@
+# Training size explanation 
+
+YOLOv6 support three training size mode.
+
+## 1. Square shape training
+If you only pass one number to  `--img-size`, such as `--img-size 640`, the longer side of image will be keep ratio resized to 640, the shorter side will be scaled with the same ratio, then padded to 640. The image send to the model with resolution (640, 640, 3).
+
+## 2. Rectangle shape training
+If you pass `--img-size 640` and `--rect`, the longer side of image will be keep ratio resized to 640, the shorter side will be scaled with the same ratio, then it will be padded to multiple of 32 (if needed). 
+For example, if one image's shape is (720, 1280, 3), after keep ratio resize, it's shape will change to (360, 640, 3), however, 320 is not multiple of 32, so it will be padded to (384, 640, 3).
+
+## 3. Specific shape
+
+In the rectangle shape mode, the training process may have different traininng size, such as (1080, 1920, 3) and (1200, 1600, 3). If you want to specify one shape, you can use `--specific-shape` command and specify your training shape with `--height ` and `--width`, for example:
+```
+python tools/train.py --data data/dataset.yaml --conf configs/yolov6n.py --specific-shape --width 1920 --height 1080
+``` 
+Then, the resolution of the training data will be (1080, 1920, 3) regardless of the shape of the image in dataset.
diff --git a/docs/About_training_size_cn.md b/docs/About_training_size_cn.md
@@ -0,0 +1,16 @@
+# 训练尺寸说明
+YOLOv6支持三种训练尺寸模式。
+
+## 1. 正方形尺寸训练
+如果只给 `--img-size` 指定一个数字，例如 `--img-size 640`，则图像的长边将被缩放到 640（保持长宽比），短边等比例缩放后，将被填充到 640。送入模型的图像的分辨率将变为（640, 640, 3）。
+
+## 2. 矩形尺寸训练
+如果传递了 `--img-size 640` 和 `--rect`，则图像的长边将被缩放到 640（保持长宽比），短边将被等比例缩放，然后填充到 32 的倍数（如果需要）。
+例如，如果一张图像的形状为（720, 1280, 3），在等比例缩放后，它的形状将变为（360, 640, 3），但是 360 不是 32 的倍数，因此它将被填充为（384, 640, 3）。
+
+## 3. 特定尺寸
+在矩形尺寸训练模式下，训练过程可能有不同的训练尺寸，例如（1080, 1920, 3）和（1200, 1600, 3）。如果您想指定一个尺寸，可以使用 `--specific-shape` 命令，并使用 `--height` 和 `--width` 指定您的训练尺寸，例如：
+```
+python tools/train.py --data data/dataset.yaml --conf configs/yolov6n.py --specific-shape --width 1920 --height 1080
+``` 
+那么，无论数据集中图片的形状是什么，训练数据的分辨率将都是 (1080, 1920, 3)。
diff --git a/tools/eval.py b/tools/eval.py
@@ -12,7 +12,7 @@
 
 from yolov6.core.evaler import Evaler
 from yolov6.utils.events import LOGGER
-from yolov6.utils.general import increment_name
+from yolov6.utils.general import increment_name, check_img_size
 from yolov6.utils.config import Config
 
 def boolean_string(s):
@@ -43,6 +43,9 @@ def get_args_parser(add_help=True):
     parser.add_argument('--plot_confusion_matrix', default=False, action='store_true', help='whether to save confusion matrix plots when do pr metric, might cause no harm warning print')
     parser.add_argument('--verbose', default=False, action='store_true', help='whether to print metric on each class')
     parser.add_argument('--config-file', default='', type=str, help='experiments description file, lower priority than reproduce_640_eval')
+    parser.add_argument('--specific-shape', action='store_true', help='rectangular training')
+    parser.add_argument('--height', type=int, default=None, help='image height of model input')
+    parser.add_argument('--width', type=int, default=None, help='image width of model input')
     args = parser.parse_args()
 
     if args.config_file:
@@ -108,6 +111,9 @@ def run(data,
         plot_curve=False,
         plot_confusion_matrix=False,
         config_file=None,
+        specific_shape=False,
+        height=640,
+        width=640
         ):
     """ Run the evaluation process
 
@@ -132,11 +138,18 @@ def run(data,
     half = device.type != 'cpu' and half
     data = Evaler.reload_dataset(data, task) if isinstance(data, str) else data
 
-    # init
+    # # verify imgsz is gs-multiple
+    if specific_shape:
+        height = check_img_size(height, 32, floor=256)
+        width = check_img_size(width, 32, floor=256)
+    else:
+        img_size = check_img_size(img_size, 32, floor=256)
     val = Evaler(data, batch_size, img_size, conf_thres, \
                 iou_thres, device, half, save_dir, \
                 shrink_size, infer_on_rect,
-                verbose, do_coco_metric, do_pr_metric, plot_curve, plot_confusion_matrix)
+                verbose, do_coco_metric, do_pr_metric,
+                plot_curve, plot_confusion_matrix,
+                specific_shape=specific_shape,height=height, width=width)
     model = val.init_model(model, weights, task)
     dataloader = val.init_data(dataloader, task)
 

diff --git a/tools/train.py b/tools/train.py
@@ -9,6 +9,7 @@
 import torch
 import torch.distributed as dist
 import sys
+import datetime
 
 ROOT = os.getcwd()
 if str(ROOT) not in sys.path:
@@ -18,14 +19,15 @@
 from yolov6.utils.config import Config
 from yolov6.utils.events import LOGGER, save_yaml
 from yolov6.utils.envs import get_envs, select_device, set_random_seed
-from yolov6.utils.general import increment_name, find_latest_checkpoint
+from yolov6.utils.general import increment_name, find_latest_checkpoint, check_img_size
 
 
 def get_args_parser(add_help=True):
     parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Training', add_help=add_help)
     parser.add_argument('--data-path', default='./data/coco.yaml', type=str, help='path of dataset')
     parser.add_argument('--conf-file', default='./configs/yolov6n.py', type=str, help='experiments description file')
     parser.add_argument('--img-size', default=640, type=int, help='train, val image size (pixels)')
+    parser.add_argument('--rect', action='store_true', help='whether to use rectangular training, default is False')
     parser.add_argument('--batch-size', default=32, type=int, help='total batch size for all GPUs')
     parser.add_argument('--epochs', default=400, type=int, help='number of total epochs to run')
     parser.add_argument('--workers', default=8, type=int, help='number of data loading workers (default: 8)')
@@ -53,6 +55,9 @@ def get_args_parser(add_help=True):
     parser.add_argument('--temperature', type=int, default=20, help='distill temperature')
     parser.add_argument('--fuse_ab', action='store_true', help='fuse ab branch in training process or not')
     parser.add_argument('--bs_per_gpu', default=32, type=int, help='batch size per GPU for auto-rescale learning rate, set to 16 for P6 models')
+    parser.add_argument('--specific-shape', action='store_true', help='rectangular training')
+    parser.add_argument('--height', type=int, default=None, help='image height of model input')
+    parser.add_argument('--width', type=int, default=None, help='image width of model input')
     return parser
 
 
@@ -80,6 +85,15 @@ def check_and_init(args):
         if master_process:
             os.makedirs(args.save_dir)
 
+    # check specific shape 
+    if args.specific_shape:
+        if args.rect:
+            LOGGER.warning('You set specific shape, and rect to True is needless. YOLOv6 will use the specific shape to train.')
+        args.height = check_img_size(args.height, 32, floor=256)  # verify imgsz is gs-multiple
+        args.width = check_img_size(args.width, 32, floor=256)
+    else:
+        args.img_size = check_img_size(args.img_size, 32, floor=256)
+
     cfg = Config.fromfile(args.conf_file)
     if not hasattr(cfg, 'training_mode'):
         setattr(cfg, 'training_mode', 'repvgg')
@@ -107,7 +121,7 @@ def main(args):
         device = torch.device('cuda', args.local_rank)
         LOGGER.info('Initializing process group... ')
         dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo", \
-                init_method=args.dist_url, rank=args.local_rank, world_size=args.world_size)
+                init_method=args.dist_url, rank=args.local_rank, world_size=args.world_size,timeout=datetime.timedelta(seconds=7200))
 
     # Start
     trainer = Trainer(args, cfg, device)

diff --git a/yolov6/assigners/anchor_generator.py b/yolov6/assigners/anchor_generator.py
@@ -13,7 +13,7 @@ def generate_anchors(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.
             _, _, h, w = feats[i].shape
             shift_x = torch.arange(end=w, device=device) + grid_cell_offset
             shift_y = torch.arange(end=h, device=device) + grid_cell_offset
-            shift_y, shift_x = torch.meshgrid(shift_y, shift_x)
+            shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing='ij')
             anchor_point = torch.stack(
                     [shift_x, shift_y], axis=-1).to(torch.float)
             if mode == 'af': # anchor-free
@@ -35,7 +35,7 @@ def generate_anchors(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.
             cell_half_size = grid_cell_size * stride * 0.5
             shift_x = (torch.arange(end=w, device=device) + grid_cell_offset) * stride
             shift_y = (torch.arange(end=h, device=device) + grid_cell_offset) * stride
-            shift_y, shift_x = torch.meshgrid(shift_y, shift_x)
+            shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing='ij')
             anchor = torch.stack(
                 [
                     shift_x - cell_half_size, shift_y - cell_half_size,