diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..4794cc2b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,7 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
diff --git a/README.md b/README.md
index f533316d..8c9cb383 100644
--- a/README.md
+++ b/README.md
@@ -28,10 +28,10 @@ YOLOv6 is composed of the following methods:
 ```shell
 git clone https://github.com/meituan/YOLOv6
 cd YOLOv6
-pip install -r requirements.txt  
+pip install -r requirements.txt
 ```
 
-### Inference 
+### Inference
 
 First, download a pretrained model from the YOLOv6 release
 
@@ -39,12 +39,12 @@ Second, run inference with `tools/infer.py`
 
 ```shell
 python tools/infer.py --weights yolov6s.pt --source [img.jpg / imgdir]
-                                yolov6n.pt  
+                                yolov6n.pt
 ```
 
 ### Training
 
-Single GPU 
+Single GPU
 
 ```shell
 python tools/train.py --batch 256 --conf configs/yolov6s.py --data data/coco.yaml --device 0
diff --git a/configs/yolov6_tiny.py b/configs/yolov6_tiny.py
index 6e6651b9..fea2595a 100644
--- a/configs/yolov6_tiny.py
+++ b/configs/yolov6_tiny.py
@@ -2,7 +2,7 @@
 model = dict(
     type='YOLOv6t',
     pretrained=None,
-    depth_multiple=0.25,  
+    depth_multiple=0.25,
     width_multiple=0.50,
     backbone=dict(
         type='EfficientRep',
@@ -35,12 +35,12 @@
     weight_decay=0.0005,
     warmup_epochs=3.0,
     warmup_momentum=0.8,
-    warmup_bias_lr=0.1       
+    warmup_bias_lr=0.1
 )
- 
+
 data_aug = dict(
-    hsv_h=0.015,  
-    hsv_s=0.7, 
+    hsv_h=0.015,
+    hsv_s=0.7,
     hsv_v=0.4,
     degrees=0.0,
     translate=0.1,
diff --git a/configs/yolov6_tiny_finetune.py b/configs/yolov6_tiny_finetune.py
index de354a8e..d751eff0 100644
--- a/configs/yolov6_tiny_finetune.py
+++ b/configs/yolov6_tiny_finetune.py
@@ -2,7 +2,7 @@
 model = dict(
     type='YOLOv6t',
     pretrained='./weights/yolov6t.pt',
-    depth_multiple=0.25,  
+    depth_multiple=0.25,
     width_multiple=0.50,
     backbone=dict(
         type='EfficientRep',
@@ -24,7 +24,7 @@
         strides=[8, 16, 32],
         iou_type='ciou'
     )
-)   
+)
 
 solver = dict(
     optim='SGD',
@@ -35,9 +35,9 @@
     weight_decay=0.00036,
     warmup_epochs=2.0,
     warmup_momentum=0.5,
-    warmup_bias_lr=0.05 
+    warmup_bias_lr=0.05
 )
- 
+
 data_aug = dict(
     hsv_h=0.0138,
     hsv_s=0.664,
diff --git a/configs/yolov6n.py b/configs/yolov6n.py
index 93d07c64..40b6e0c4 100644
--- a/configs/yolov6n.py
+++ b/configs/yolov6n.py
@@ -2,7 +2,7 @@
 model = dict(
     type='YOLOv6n',
     pretrained=None,
-    depth_multiple=0.33,  
+    depth_multiple=0.33,
     width_multiple=0.25,
     backbone=dict(
         type='EfficientRep',
@@ -35,12 +35,12 @@
     weight_decay=0.0005,
     warmup_epochs=3.0,
     warmup_momentum=0.8,
-    warmup_bias_lr=0.1       
+    warmup_bias_lr=0.1
 )
 
 data_aug = dict(
-    hsv_h=0.015,  
-    hsv_s=0.7, 
+    hsv_h=0.015,
+    hsv_s=0.7,
     hsv_v=0.4,
     degrees=0.0,
     translate=0.1,
@@ -50,4 +50,4 @@
     fliplr=0.5,
     mosaic=1.0,
     mixup=0.0,
-)
\ No newline at end of file
+)
diff --git a/configs/yolov6n_finetune.py b/configs/yolov6n_finetune.py
index 1c2944fa..7d1fab5a 100644
--- a/configs/yolov6n_finetune.py
+++ b/configs/yolov6n_finetune.py
@@ -24,7 +24,7 @@
         strides=[8, 16, 32],
         iou_type='ciou'
     )
-)   
+)
 
 solver = dict(
     optim='SGD',
@@ -35,9 +35,9 @@
     weight_decay=0.00036,
     warmup_epochs=2.0,
     warmup_momentum=0.5,
-    warmup_bias_lr=0.05 
+    warmup_bias_lr=0.05
 )
- 
+
 data_aug = dict(
     hsv_h=0.0138,
     hsv_s=0.664,
diff --git a/configs/yolov6s.py b/configs/yolov6s.py
index 10e86703..8b281bf6 100644
--- a/configs/yolov6s.py
+++ b/configs/yolov6s.py
@@ -35,12 +35,12 @@
     weight_decay=0.0005,
     warmup_epochs=3.0,
     warmup_momentum=0.8,
-    warmup_bias_lr=0.1       
+    warmup_bias_lr=0.1
 )
 
 data_aug = dict(
-    hsv_h=0.015,  
-    hsv_s=0.7, 
+    hsv_h=0.015,
+    hsv_s=0.7,
     hsv_v=0.4,
     degrees=0.0,
     translate=0.1,
diff --git a/configs/yolov6s_finetune.py b/configs/yolov6s_finetune.py
index 2c7fa49b..50d1d5a6 100644
--- a/configs/yolov6s_finetune.py
+++ b/configs/yolov6s_finetune.py
@@ -2,7 +2,7 @@
 model = dict(
     type='YOLOv6s',
     pretrained='./weights/yolov6s.pt',
-    depth_multiple=0.33,  
+    depth_multiple=0.33,
     width_multiple=0.50,
     backbone=dict(
         type='EfficientRep',
@@ -24,7 +24,7 @@
         strides=[8, 16, 32],
         iou_type='siou'
     )
-)   
+)
 
 solver = dict(
     optim='SGD',
@@ -35,9 +35,9 @@
     weight_decay=0.00036,
     warmup_epochs=2.0,
     warmup_momentum=0.5,
-    warmup_bias_lr=0.05 
+    warmup_bias_lr=0.05
 )
- 
+
 data_aug = dict(
     hsv_h=0.0138,
     hsv_s=0.664,
diff --git a/data/coco.yaml b/data/coco.yaml
index 3058c3f3..699551b9 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -16,4 +16,3 @@ names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', '
          'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
          'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
          'hair drier', 'toothbrush' ]
-
diff --git a/deploy/ONNX/README.md b/deploy/ONNX/README.md
index 57ba3a1f..72fd70b0 100644
--- a/deploy/ONNX/README.md
+++ b/deploy/ONNX/README.md
@@ -2,13 +2,13 @@
 
 ### Check requirements
 ```shell
-pip install onnx>=1.10.0                             
+pip install onnx>=1.10.0
 ```
 
 ### Export script
 ```shell
 python deploy/ONNX/export_onnx.py --weights yolov6s.pt --img 640 --batch 1
-                                  
+
 ```
 
 ### Download
diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
index 363a17e2..ef29802a 100644
--- a/deploy/ONNX/export_onnx.py
+++ b/deploy/ONNX/export_onnx.py
@@ -3,12 +3,12 @@
 import argparse
 import time
 import sys
-import os  
+import os
 import torch
 import torch.nn as nn
 import onnx
 
-ROOT = os.getcwd()                                
+ROOT = os.getcwd()
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))
 
@@ -21,9 +21,9 @@
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default='./yolov6s.pt', help='weights path')  
+    parser.add_argument('--weights', type=str, default='./yolov6s.pt', help='weights path')
     parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
-    parser.add_argument('--batch-size', type=int, default=1, help='batch size')  
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
     parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
     parser.add_argument('--inplace', action='store_true', help='set Detect() inplace=True')
     parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
@@ -34,9 +34,9 @@
 
     # Check device
     cuda = args.device != 'cpu' and torch.cuda.is_available()
-    device = torch.device('cuda:0' if cuda else 'cpu') 
+    device = torch.device('cuda:0' if cuda else 'cpu')
     assert not (device.type == 'cpu' and args.half), '--half only compatible with GPU export, i.e. use --device 0'
-    # Load PyTorch model    
+    # Load PyTorch model
     model = load_checkpoint(args.weights, map_location=device, inplace=True, fuse=True)  # load FP32 model
     for layer in model.modules():
         if isinstance(layer, RepVGGBlock):
@@ -55,7 +55,7 @@
                 m.act = SiLU()
         elif isinstance(m, EffiDeHead):
             m.inplace = args.inplace
-       
+
     y = model(img)  # dry run
 
     # ONNX export
diff --git a/deploy/OpenVINO/README.md b/deploy/OpenVINO/README.md
index 8f48e5eb..cd800e32 100644
--- a/deploy/OpenVINO/README.md
+++ b/deploy/OpenVINO/README.md
@@ -3,13 +3,13 @@
 ### Check requirements
 ```shell
 pip install --upgrade pip
-pip install openvino-dev                             
+pip install openvino-dev
 ```
 
 ### Export script
 ```shell
 python deploy/OpenVINO/export_openvino.py --weights yolov6s.pt --img 640 --batch 1
-                                  
+
 ```
 
 ### Download
diff --git a/deploy/OpenVINO/export_openvino.py b/deploy/OpenVINO/export_openvino.py
index 4d209003..c5d0a5b4 100644
--- a/deploy/OpenVINO/export_openvino.py
+++ b/deploy/OpenVINO/export_openvino.py
@@ -3,13 +3,13 @@
 import argparse
 import time
 import sys
-import os  
+import os
 import torch
 import torch.nn as nn
 import onnx
 import subprocess
 
-ROOT = os.getcwd()                                
+ROOT = os.getcwd()
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))
 
@@ -22,9 +22,9 @@
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', type=str, default='./yolov6s.pt', help='weights path')  
+    parser.add_argument('--weights', type=str, default='./yolov6s.pt', help='weights path')
     parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
-    parser.add_argument('--batch-size', type=int, default=1, help='batch size')  
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
     parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
     parser.add_argument('--inplace', action='store_true', help='set Detect() inplace=True')
     parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
@@ -35,9 +35,9 @@
 
     # Check device
     cuda = args.device != 'cpu' and torch.cuda.is_available()
-    device = torch.device('cuda:0' if cuda else 'cpu') 
+    device = torch.device('cuda:0' if cuda else 'cpu')
     assert not (device.type == 'cpu' and args.half), '--half only compatible with GPU export, i.e. use --device 0'
-    # Load PyTorch model    
+    # Load PyTorch model
     model = load_checkpoint(args.weights, map_location=device, inplace=True, fuse=True)  # load FP32 model
     for layer in model.modules():
         if isinstance(layer, RepVGGBlock):
@@ -56,7 +56,7 @@
                 m.act = SiLU()
         elif isinstance(m, EffiDeHead):
             m.inplace = args.inplace
-       
+
     y = model(img)  # dry run
 
     # ONNX export
diff --git a/docs/Test_speed.md b/docs/Test_speed.md
index ee9ab64c..526e0474 100644
--- a/docs/Test_speed.md
+++ b/docs/Test_speed.md
@@ -8,7 +8,7 @@ Download the models you want to test from the latest release.
 
 ## 1. Prepare testing environment
 
-Refer to README, install packages corresponding to CUDA, CUDNN and TensorRT version. 
+Refer to README, install packages corresponding to CUDA, CUDNN and TensorRT version.
 
 Here, we use Torch1.8.0 inference on V100 and TensorRT 7.2 on T4.
 
@@ -31,7 +31,7 @@ To  get inference speed with TensorRT in FP16 mode on T4, you can follow the ste
 First, export pytorch model as onnx format using the  following command:
 
 ```shell
-python deploy/ONNX/export_onnx.py --weights yolov6n.pt --device 0 --batch [1 or 32] 
+python deploy/ONNX/export_onnx.py --weights yolov6n.pt --device 0 --batch [1 or 32]
 ```
 
 Second,  generate an inference  trt engine and test speed using `trtexec`:
@@ -39,4 +39,3 @@ Second,  generate an inference  trt engine and test speed using `trtexec`:
 ```
 trtexec --onnx=yolov6n.onnx --workspace=1024 --avgRuns=1000 --inputIOFormats=fp16:chw --outputIOFormats=fp16:chw
 ```
-
diff --git a/docs/Train_custom_data.md b/docs/Train_custom_data.md
index 60deaaa6..de67a37f 100644
--- a/docs/Train_custom_data.md
+++ b/docs/Train_custom_data.md
@@ -27,7 +27,7 @@ One image corresponds to one label file, and the label format example is present
 
 **Step 3** Organize directories.
 
-Organize your train and val images and label files according to the example below. 
+Organize your train and val images and label files according to the example below.
 
 ```shell
 # image directory
@@ -68,19 +68,19 @@ Or just use the provided config file in `$YOLOV6_HOME/configs/*_finetune.py`.
 model = dict(
     type='YOLOv6s',
     pretrained='./weights/yolov6s.pt', # download pretrain model from YOLOv6 github if use pretrained model
-    depth_multiple = 0.33,  
+    depth_multiple = 0.33,
     width_multiple = 0.50,
     ...
 )
 solver=dict(
     optim='SGD',
     lr_scheduler='Cosine',
-    ...  
+    ...
 )
 
 data_aug = dict(
-    hsv_h=0.015,  
-    hsv_s=0.7, 
+    hsv_h=0.015,
+    hsv_s=0.7,
     hsv_v=0.4,
     ...
 )
@@ -90,7 +90,7 @@ data_aug = dict(
 
 ## 3. Train
 
-Single GPU 
+Single GPU
 
 ```shell
 python tools/train.py --batch 256 --conf configs/yolov6s_finetune.py --data data/data.yaml --device 0
@@ -115,7 +115,7 @@ python tools/eval.py --data data/data.yaml  --weights output_dir/name/weights/be
 ## 5. Inference
 
 ```shell
-python tools/infer.py --weights output_dir/name/weights/best_ckpt.pt --source img.jpg --device 0                                                               
+python tools/infer.py --weights output_dir/name/weights/best_ckpt.pt --source img.jpg --device 0
 ```
 
 
@@ -127,4 +127,3 @@ Export as ONNX Format
 ```shell
 python deploy/ONNX/export_onnx.py --weights output_dir/name/weights/best_ckpt.pt --device 0
 ```
-
diff --git a/requirements.txt b/requirements.txt
index 33305090..304d48e9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,4 @@ addict>=2.4.0
 tensorboard>=2.7.0
 pycocotools>=2.0
 onnx>=1.10.0  # ONNX export
-thop  # FLOPs computation
\ No newline at end of file
+thop  # FLOPs computation
diff --git a/tools/eval.py b/tools/eval.py
index 88e813b0..79861ac2 100644
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -5,7 +5,7 @@
 import sys
 import torch
 
-ROOT = os.getcwd()                                                           
+ROOT = os.getcwd()
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))
 
@@ -52,24 +52,24 @@ def run(data,
     evaluation of inference speed of model.pt.
 
     """
-    
+
      # task
     Evaler.check_task(task)
     if not os.path.exists(save_dir):
-        os.makedirs(save_dir) 
+        os.makedirs(save_dir)
 
     # reload thres/device/half/data according task
     conf_thres, iou_thres = Evaler.reload_thres(conf_thres, iou_thres, task)
     device = Evaler.reload_device(device, model, task)
     half = device.type != 'cpu' and half
     data = Evaler.reload_dataset(data) if isinstance(data, str) else data
-       
+
     # init
     val = Evaler(data, batch_size, img_size, conf_thres, \
                 iou_thres, device, half, save_dir)
     model = val.init_model(model, weights, task)
     dataloader = val.init_data(dataloader, task)
-    
+
     # eval
     model.eval()
     pred_result = val.predict_model(model, dataloader, task)
@@ -83,4 +83,4 @@ def main(args):
 
 if __name__ == "__main__":
     args = get_args_parser()
-    main(args) 
+    main(args)
diff --git a/tools/infer.py b/tools/infer.py
index 48be13b0..89841b0a 100644
--- a/tools/infer.py
+++ b/tools/infer.py
@@ -9,7 +9,7 @@
 
 ROOT = os.getcwd()
 if str(ROOT) not in sys.path:
-    sys.path.append(str(ROOT))  
+    sys.path.append(str(ROOT))
 
 from yolov6.utils.events import LOGGER
 from yolov6.core.inferer import Inferer
@@ -34,7 +34,7 @@ def get_args_parser(add_help=True):
     parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels.')
     parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences.')
     parser.add_argument('--half', action='store_true', help='whether to use FP16 half-precision inference.')
-    
+
     args = parser.parse_args()
     LOGGER.info(args)
     return args
@@ -59,7 +59,7 @@ def run(weights=osp.join(ROOT, 'yolov6s.pt'),
         half=False,
         ):
     """ Inference process
-    
+
     This function is the main process of inference, supporting image files or dirs containing images.
 
     Args:
diff --git a/tools/train.py b/tools/train.py
index 3f1a592a..927d997b 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -7,7 +7,7 @@
 import torch.distributed as dist
 import sys
 
-ROOT = os.getcwd()                                                        
+ROOT = os.getcwd()
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))
 
@@ -20,12 +20,12 @@
 def get_args_parser(add_help=True):
     parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Training', add_help=add_help)
     parser.add_argument('--data-path', default='./data/coco.yaml', type=str, help='dataset path')
-    parser.add_argument('--conf-file', default='./configs/yolov6s.py', type=str, help='experiment description file') 
+    parser.add_argument('--conf-file', default='./configs/yolov6s.py', type=str, help='experiment description file')
     parser.add_argument('--img-size', type=int, default=640, help='train, val image size (pixels)')
     parser.add_argument('--batch-size', default=32, type=int, help='total batch size for all GPUs')
     parser.add_argument('--epochs', default=400, type=int, help='number of total epochs to run')
     parser.add_argument('--workers', default=8, type=int, help='number of data loading workers (default: 8)')
-    parser.add_argument('--device', default='0', type=str, help='cuda device, i.e. 0 or 0,1,2,3 or cpu')  
+    parser.add_argument('--device', default='0', type=str, help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     parser.add_argument('--noval', action='store_true', help='only evaluate in final epoch')
     parser.add_argument('--check-images', action='store_true', help='check images when initializing datasets')
     parser.add_argument('--check-labels', action='store_true', help='check label files when initializing datasets')
@@ -34,7 +34,7 @@ def get_args_parser(add_help=True):
     parser.add_argument('--dist_url', type=str, default="tcp://127.0.0.1:8888")
     parser.add_argument('--gpu_count', type=int, default=0)
     parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
- 
+
     return parser
 
 
@@ -71,11 +71,11 @@ def main(args):
         LOGGER.info('Initializing process group... ')
         dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo", \
                 init_method=args.dist_url, rank=args.local_rank, world_size=args.world_size)
-    
+
     # Start
     trainer = Trainer(args, cfg, device)
     trainer.train()
-    
+
     # End
     if args.world_size > 1 and args.rank == 0:
         LOGGER.info('Destroying process group... ')
@@ -84,4 +84,4 @@ def main(args):
 
 if __name__ == '__main__':
     args = get_args_parser().parse_args()
-    main(args)
\ No newline at end of file
+    main(args)
diff --git a/yolov6/core/engine.py b/yolov6/core/engine.py
index 22cc9d1b..927523ed 100644
--- a/yolov6/core/engine.py
+++ b/yolov6/core/engine.py
@@ -28,7 +28,7 @@ def __init__(self, args, cfg, device):
         self.args = args
         self.cfg = cfg
         self.device = device
-        
+
         self.rank = args.rank
         self.local_rank = args.local_rank
         self.world_size = args.world_size
@@ -60,10 +60,10 @@ def train(self):
             self.train_before_loop()
             for self.epoch in range(self.start_epoch, self.max_epoch):
                 self.train_in_loop()
-                    
+
         except Exception as _:
             LOGGER.error('ERROR in training loop or eval/save model.')
-            raise   
+            raise
         finally:
             self.train_after_loop()
 
@@ -73,7 +73,7 @@ def train_in_loop(self):
             self.prepare_for_steps()
             for self.step, self.batch_data in self.pbar:
                 self.train_in_steps()
-                self.print_details()     
+                self.print_details()
         except Exception as _:
             LOGGER.error('ERROR in training steps.')
             raise
@@ -87,15 +87,15 @@ def train_in_loop(self):
     def train_in_steps(self):
         images, targets = self.prepro_data(self.batch_data, self.device)
         # forward
-        with amp.autocast(enabled=self.device != 'cpu'): 
-            preds = self.model(images) 
+        with amp.autocast(enabled=self.device != 'cpu'):
+            preds = self.model(images)
             total_loss, loss_items = self.compute_loss(preds, targets)
             if self.rank != -1:
                 total_loss *= self.world_size
         # backward
         self.scaler.scale(total_loss).backward()
         self.loss_items = loss_items
-        self.update_optimizer()       
+        self.update_optimizer()
 
     def eval_and_save(self):
         epoch_sub = self.max_epoch - self.epoch
@@ -115,7 +115,7 @@ def eval_and_save(self):
                     'optimizer': self.optimizer.state_dict(),
                     'epoch': self.epoch,
                     }
-            
+
             save_ckpt_dir = osp.join(self.save_dir, 'weights')
             save_checkpoint(ckpt, (is_val_epoch) and (self.ap == self.best_ap), save_ckpt_dir, model_name='last_ckpt')
             del ckpt
@@ -132,8 +132,8 @@ def eval_model(self):
                            task='train')
 
         LOGGER.info(f"Epoch: {self.epoch} | mAP@0.5: {results[0]} | mAP@0.50:0.95: {results[1]}")
-        self.evaluate_results = results[:2]    
-       
+        self.evaluate_results = results[:2]
+
     def train_before_loop(self):
         LOGGER.info('Training start...')
         self.start_time = time.time()
@@ -151,22 +151,22 @@ def prepare_for_steps(self):
             self.scheduler.step()
         self.model.train()
         if self.rank != -1:
-            self.train_loader.sampler.set_epoch(self.epoch) 
+            self.train_loader.sampler.set_epoch(self.epoch)
         self.mean_loss = torch.zeros(4, device=self.device)
         self.optimizer.zero_grad()
-        
+
         LOGGER.info(('\n' + '%10s' * 5) % ('Epoch', 'iou_loss', 'l1_loss', 'obj_loss', 'cls_loss'))
         self.pbar = enumerate(self.train_loader)
         if self.main_process:
             self.pbar = tqdm(self.pbar, total=self.max_stepnum, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')
-        
+
     # Print loss after each steps
     def print_details(self):
         if self.main_process:
             self.mean_loss = (self.mean_loss * self.step + self.loss_items) / (self.step + 1)
             self.pbar.set_description(('%10s' + '%10.4g' * 4) % (f'{self.epoch}/{self.max_epoch - 1}', \
                                                                 *(self.mean_loss)))
-                                                
+
     # Empty cache if training finished
     def train_after_loop(self):
         if self.main_process:
@@ -205,7 +205,7 @@ def get_data_loader(args, cfg, data_dict):
         # create train dataloader
         train_loader = create_dataloader(train_path, args.img_size, args.batch_size // args.world_size, grid_size,
                                          hyp=dict(cfg.data_aug), augment=True, rect=False, rank=args.local_rank,
-                                         workers=args.workers, shuffle=True, check_images=args.check_images, 
+                                         workers=args.workers, shuffle=True, check_images=args.check_images,
                                          check_labels=args.check_labels, class_names=class_names, task='train')[0]
         # create val dataloader
         val_loader = None
@@ -243,7 +243,7 @@ def parallel_model(args, model, device):
 
         # If DDP mode
         ddp_mode = device.type != 'cpu' and args.rank != -1
-        if ddp_mode:    
+        if ddp_mode:
             model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank)
 
         return model
diff --git a/yolov6/core/evaler.py b/yolov6/core/evaler.py
index e96d4fca..16b2e50e 100644
--- a/yolov6/core/evaler.py
+++ b/yolov6/core/evaler.py
@@ -49,7 +49,7 @@ def init_model(self, model, weights, task):
             self.stride = int(model.stride.max())
             if self.device.type != 'cpu':
                 model(torch.zeros(1, 3, self.img_size, self.img_size).to(self.device).type_as(next(model.parameters())))
-            # switch to deploy    
+            # switch to deploy
             from yolov6.layers.common import RepVGGBlock
             for layer in model.modules():
                 if isinstance(layer, RepVGGBlock):
@@ -61,18 +61,18 @@ def init_model(self, model, weights, task):
 
     def init_data(self, dataloader, task):
         '''Initialize dataloader.
-        
+
         Returns a dataloader for task val or speed.
         '''
         self.is_coco = isinstance(self.data.get('val'), str) and 'coco' in self.data['val']  # COCO dataset
         self.ids = self.coco80_to_coco91_class() if self.is_coco else list(range(1000))
         if task != 'train':
             pad = 0.0 if task == 'speed' else 0.5
-            dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'], 
-                                           self.img_size, self.batch_size, self.stride, pad=pad, rect=True, 
+            dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'],
+                                           self.img_size, self.batch_size, self.stride, pad=pad, rect=True,
                                            class_names=self.data['names'], task=task)[0]
         return dataloader
-            
+
     def predict_model(self, model, dataloader, task):
         '''Model prediction
         Predicts the whole dataset and gets the prediced results and inference time.
@@ -87,7 +87,7 @@ def predict_model(self, model, dataloader, task):
             imgs = imgs.half() if self.half else imgs.float()
             imgs /= 255
             self.speed_result[1] += time_sync() - t1  # pre-process time
-            
+
             # Inference
             t2 = time_sync()
             outputs = model(imgs)
@@ -98,11 +98,11 @@ def predict_model(self, model, dataloader, task):
             outputs = non_max_suppression(outputs, self.conf_thres, self.iou_thres, multi_label=True)
             self.speed_result[3] += time_sync() - t3  # post-process time
             self.speed_result[0] += len(outputs)
-            
+
             # save result
             pred_results.extend(self.convert_to_coco_format(outputs, imgs, paths, shapes, self.ids))
         return pred_results
-    
+
     def eval_model(self, pred_results, model, dataloader, task):
         '''Evaluate current model
         For task speed, this function only evaluates the speed of model and output inference time.
@@ -125,14 +125,14 @@ def eval_model(self, pred_results, model, dataloader, task):
             LOGGER.info(f'Saving {pred_json}...')
             with open(pred_json, 'w') as f:
                 json.dump(pred_results, f)
-            
+
             anno = COCO(anno_json)
             pred = anno.loadRes(pred_json)
             cocoEval = COCOeval(anno, pred, 'bbox')
             if self.is_coco:
-                imgIds = [int(os.path.basename(x).split(".")[0]) 
+                imgIds = [int(os.path.basename(x).split(".")[0])
                             for x in dataloader.dataset.img_paths]
-                cocoEval.params.imgIds = imgIds 
+                cocoEval.params.imgIds = imgIds
             cocoEval.evaluate()
             cocoEval.accumulate()
             cocoEval.summarize()
@@ -140,7 +140,7 @@ def eval_model(self, pred_results, model, dataloader, task):
             # Return results
             model.float()  # for training
             if task != 'train':
-                LOGGER.info(f"Results saved to {self.save_dir}") 
+                LOGGER.info(f"Results saved to {self.save_dir}")
             return (map50, map)
         return (0.0, 0.0)
 
@@ -148,7 +148,7 @@ def eval_speed(self, task):
         '''Evaluate the speed of model.'''
         if task != 'train':
             n_samples = self.speed_result[0].item()
-            pre_time, inf_time, nms_time = 1000 * self.speed_result[1:].cpu().numpy() / n_samples 
+            pre_time, inf_time, nms_time = 1000 * self.speed_result[1:].cpu().numpy() / n_samples
             for n, v in zip(["pre-process", "inference", "NMS"],[pre_time, inf_time, nms_time]):
                 LOGGER.info("Average {} time: {:.2f} ms".format(n, v))
 
@@ -182,7 +182,7 @@ def scale_coords(self, img1_shape, coords, img0_shape, ratio_pad=None):
             coords[:, [0, 2]] = coords[:, [0, 2]].clip(0, img0_shape[1])  # x1, x2
             coords[:, [1, 3]] = coords[:, [1, 3]].clip(0, img0_shape[0])  # y1, y2
         return coords
-    
+
     def convert_to_coco_format(self, outputs, imgs, paths, shapes, ids):
         pred_results = []
         for i, pred in enumerate(outputs):
@@ -207,12 +207,12 @@ def convert_to_coco_format(self, outputs, imgs, paths, shapes, ids):
                 }
                 pred_results.append(pred_data)
         return pred_results
-    
+
     @staticmethod
     def check_task(task):
         if task not in ['train','val','speed']:
             raise Exception("task argument error: only support 'train' / 'val' / 'speed' task.")
-    
+
     @staticmethod
     def reload_thres(conf_thres, iou_thres, task):
         '''Sets conf and iou thres for task val/speed'''
@@ -223,7 +223,7 @@ def reload_thres(conf_thres, iou_thres, task):
                 conf_thres = 0.25
                 iou_thres = 0.45
         return conf_thres, iou_thres
-    
+
     @staticmethod
     def reload_device(device, model, task):
         # device = 'cpu' or '0' or '0,1,2,3'
@@ -238,8 +238,8 @@ def reload_device(device, model, task):
             cuda = device != 'cpu' and torch.cuda.is_available()
             device = torch.device('cuda:0' if cuda else 'cpu')
         return device
-    
-    @staticmethod 
+
+    @staticmethod
     def reload_dataset(data):
         with open(data, errors='ignore') as yaml_file:
             data = yaml.safe_load(yaml_file)
@@ -248,12 +248,12 @@ def reload_dataset(data):
             raise Exception('Dataset not found.')
         return data
 
-    @staticmethod   
+    @staticmethod
     def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
     # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
-        x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 
-            21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
-            41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
-            59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 
+        x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
+            21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+            41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
+            59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79,
             80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
         return x
diff --git a/yolov6/data/data_augment.py b/yolov6/data/data_augment.py
index 1825e7d3..e4acea61 100644
--- a/yolov6/data/data_augment.py
+++ b/yolov6/data/data_augment.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
-# This code is based on 
+# This code is based on
 # https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
 
 import math
@@ -73,7 +73,7 @@ def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  #
 
 def random_affine(img, labels=(), degrees=10, translate=.1, scale=.1, shear=10,
                        new_shape=(640,640)):
-    
+
     n = len(labels)
     height,width = new_shape
 
@@ -125,7 +125,7 @@ def get_transform_matrix(img_shape,new_shape,degrees,scale,shear,translate):
     # Shear
     S = np.eye(3)
     S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
-    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg) 
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
 
     # Translation
     T = np.eye(3)
@@ -140,7 +140,7 @@ def get_transform_matrix(img_shape,new_shape,degrees,scale,shear,translate):
 def mosaic_augmentation(img_size, imgs, hs, ws, labels, hyp):
 
     assert len(imgs)==4, "Mosaic augmentaion of current version only supports 4 images."
-    
+
     labels4 = []
     s = img_size
     yc, xc = (int(random.uniform(s//2, 3*s//2)) for _ in range(2))  # mosaic center x, y
@@ -181,7 +181,7 @@ def mosaic_augmentation(img_size, imgs, hs, ws, labels, hyp):
     # Concat/clip labels
     labels4 = np.concatenate(labels4, 0)
     for x in (labels4[:, 1:]):
-        np.clip(x, 0, 2 * s, out=x) 
+        np.clip(x, 0, 2 * s, out=x)
 
     # Augment
     img4, labels4 = random_affine(img4, labels4,
diff --git a/yolov6/data/data_load.py b/yolov6/data/data_load.py
index fd245805..e8e273ca 100644
--- a/yolov6/data/data_load.py
+++ b/yolov6/data/data_load.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
-# This code is based on 
+# This code is based on
 # https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
 
 import os
@@ -19,7 +19,7 @@ def create_dataloader(path, img_size, batch_size, stride, hyp=None, augment=Fals
     if rect and shuffle:
         LOGGER.warning('WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False')
         shuffle = False
-    with torch_distributed_zero_first(rank): 
+    with torch_distributed_zero_first(rank):
         dataset = TrainValDataset(path, img_size, batch_size,
                                   augment=augment,
                                   hyp=hyp,
@@ -74,4 +74,4 @@ def __init__(self, sampler):
 
     def __iter__(self):
         while True:
-            yield from iter(self.sampler)
\ No newline at end of file
+            yield from iter(self.sampler)
diff --git a/yolov6/data/datasets.py b/yolov6/data/datasets.py
index 2bcbea26..3a0e204b 100644
--- a/yolov6/data/datasets.py
+++ b/yolov6/data/datasets.py
@@ -21,7 +21,7 @@
 from yolov6.utils.events import LOGGER
 
 # Parameters
-IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] 
+IMG_FORMATS = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']
 # Get orientation exif tag
 for k,v in ExifTags.TAGS.items():
     if v == 'Orientation':
@@ -58,7 +58,7 @@ def __getitem__(self, index):
         if self.augment and random.random() < self.hyp['mosaic']:
             img,labels = self.get_mosaic(index)
             shapes = None
-            
+
             # MixUp augmentation
             if random.random() < self.hyp['mixup']:
                 img_other, labels_other = self.get_mosaic(random.randint(0,len(self.img_paths)-1))
@@ -74,7 +74,7 @@ def __getitem__(self, index):
             shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
 
             labels = self.labels[index].copy()
-            if labels.size:  
+            if labels.size:
                 w *= ratio
                 h *= ratio
                 # new boxes
@@ -86,20 +86,20 @@ def __getitem__(self, index):
                 labels[:,1:] = boxes
 
             if self.augment:
-                img, labels = random_affine(img, 
+                img, labels = random_affine(img,
                                             labels,
                                             degrees=self.hyp['degrees'],
                                             translate=self.hyp['translate'],
                                             scale=self.hyp['scale'],
                                             shear=self.hyp['shear'],
                                             new_shape=(self.img_size,self.img_size))
-                
+
         if len(labels):
             h,w = img.shape[:2]
 
             labels[:, [1, 3]] = labels[:, [1, 3]].clip(0, w - 1E-3)  # x1, x2
             labels[:, [2, 4]] = labels[:, [2, 4]].clip(0, h - 1E-3)  # y1, y2
-            
+
             boxes = np.copy(labels[:,1:])
             boxes[:, 0] = ((labels[:, 1] + labels[:, 3]) / 2) / w  # x center
             boxes[:, 1] = ((labels[:, 2] + labels[:, 4]) / 2) / h  # y center
@@ -109,7 +109,7 @@ def __getitem__(self, index):
 
         if self.augment:
             img, labels = self.general_augment(img,labels)
-            
+
         labels_out = torch.zeros((len(labels), 6))
         if len(labels):
             labels_out[:, 1:] = torch.from_numpy(labels)
@@ -119,7 +119,7 @@ def __getitem__(self, index):
         img = np.ascontiguousarray(img)
 
         return torch.from_numpy(img), labels_out, self.img_paths[index], shapes
-    
+
     def load_image(self, index):
         '''Load image.
         This function loads image by cv2, resize original image to target shape(img_size) with keeping ratio.
@@ -128,37 +128,37 @@ def load_image(self, index):
             Image, original shape of image, resized image shape
         '''
         path = self.img_paths[index]
-        im = cv2.imread(path) 
+        im = cv2.imread(path)
         assert im is not None, f'Image Not Found {path}, workdir: {os.getcwd()}'
-        
+
         h0, w0 = im.shape[:2]  # origin shape
-        r = self.img_size / max(h0, w0) 
+        r = self.img_size / max(h0, w0)
         if r != 1:
             im = cv2.resize(im, (int(w0 * r), int(h0 * r)),
                             interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
         return im, (h0, w0), im.shape[:2]
-    
+
     @staticmethod
     def collate_fn(batch):
         '''Merges a list of samples to form a mini-batch of Tensor(s)'''
-        img, label, path, shapes = zip(*batch) 
+        img, label, path, shapes = zip(*batch)
         for i, l in enumerate(label):
             l[:, 0] = i  # add target image index for build_targets()
         return torch.stack(img, 0), torch.cat(label, 0), path, shapes
 
-    
+
     def get_imgs_labels(self, img_dir):
-        
+
         assert osp.exists(img_dir), f'{img_dir} is an invalid directory path!'
         valid_img_record = osp.join(osp.dirname(img_dir), '.'+osp.basename(img_dir)+'.json')
         img_info = {}
-        NUM_THREADS = min(8, os.cpu_count()) 
+        NUM_THREADS = min(8, os.cpu_count())
         # check images
         if (self.check_images or not osp.exists(valid_img_record)) and self.main_process:
             img_paths = glob.glob(osp.join(img_dir, '*'), recursive=True)
             img_paths = sorted(p for p in img_paths if p.split('.')[-1].lower() in IMG_FORMATS)
             assert img_paths, f'No images found in {img_dir}.'
-            
+
             nc, msgs = 0, []  # number corrupt, messages
             LOGGER.info(f"{self.task}: Checking formats of images with {NUM_THREADS} process(es): ")
             with Pool(NUM_THREADS) as pool:
@@ -173,12 +173,12 @@ def get_imgs_labels(self, img_dir):
             pbar.close()
             if msgs:
                 LOGGER.info('\n'.join(msgs))
-            
+
             # save valid image paths.
             with open(valid_img_record, 'w') as f:
                 json.dump(img_info,f)
-                    
-                    
+
+
         # check and load anns
         label_dir = osp.join(osp.dirname(osp.dirname(img_dir)), 'labels', osp.basename(img_dir))
         assert osp.exists(label_dir), f'{label_dir} is an invalid directory path!'
@@ -210,14 +210,14 @@ def get_imgs_labels(self, img_dir):
             if self.main_process:
                 pbar.close()
                 with open(valid_img_record, 'w') as f:
-                    json.dump(img_info,f)  
+                    json.dump(img_info,f)
             if msgs:
                 LOGGER.info('\n'.join(msgs))
             if nf == 0:
                 LOGGER.warning(f'WARNING: No labels found in {osp.dirname(self.img_paths[0])}. ')
         else:
             with open(valid_img_record) as f:
-                img_info = json.load(f)  
+                img_info = json.load(f)
         if self.task.lower() == 'val':
             assert self.class_names, 'Class names is required when converting labels to coco format for evaluating.'
             save_dir = osp.join(osp.dirname(osp.dirname(img_dir)), 'annotations')
@@ -226,12 +226,12 @@ def get_imgs_labels(self, img_dir):
             save_path = osp.join(save_dir, 'instances_' + osp.basename(img_dir) + '.json')
             if not osp.exists(save_path):
                 TrainValDataset.generate_coco_format_labels(img_info, self.class_names, save_path)
-            
+
         img_paths, labels = list(zip(*[(img_path, np.array(info['labels'], dtype=np.float32) if info['labels'] else np.zeros((0,5), dtype=np.float32)) for img_path, info in img_info.items()]))
         self.img_info = img_info
         LOGGER.info(f'{self.task}: Final numbers of valid images: {len(img_paths)}/ labels: {len(labels)}. ')
         return img_paths, labels
-            
+
     def get_mosaic(self, index):
         '''Gets images and labels after mosaic augments '''
         indices = [index] + random.choices(range(0, len(self.img_paths)), k=3)  # 3 additional image indices
@@ -246,12 +246,12 @@ def get_mosaic(self, index):
             labels.append(labels_per_img)
         img, labels = mosaic_augmentation(self.img_size, imgs, hs, ws, labels, self.hyp)
         return img, labels
-    
+
     def general_augment(self, img, labels):
         '''Gets images and labels after general augment
         This function applies hsv, random ud-flip and random lr-flips augments.
         '''
-        nl = len(labels)  
+        nl = len(labels)
 
         # HSV color-space
         augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v'])
@@ -267,9 +267,9 @@ def general_augment(self, img, labels):
             img = np.fliplr(img)
             if nl:
                 labels[:, 1] = 1 - labels[:, 1]
-                
+
         return img, labels
-    
+
     def sort_files_shapes(self):
         # Sort by aspect ratio
         batch_num = self.batch_indices[-1]+1
@@ -291,7 +291,7 @@ def sort_files_shapes(self):
             elif mini > 1:
                 shapes[i] = [1, 1 / mini]
         self.batch_shapes = np.ceil(np.array(shapes) * self.img_size / self.stride + self.pad).astype(np.int) * self.stride
-        
+
     @staticmethod
     def check_image(im_file):
         # verify an image.
@@ -303,9 +303,9 @@ def check_image(im_file):
             im_exif = im._getexif()
             if im_exif and ORIENTATION in im_exif:
                 rotation = im_exif[ORIENTATION]
-                if rotation in (6,8): 
+                if rotation in (6,8):
                     shape = (shape[1], shape[0])
-                    
+
             assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
             assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}'
             if im.format.lower() in ('jpg', 'jpeg'):
@@ -319,7 +319,7 @@ def check_image(im_file):
             nc = 1
             msg = f'WARNING: {im_file}: ignoring corrupt image: {e}'
             return im_file, None, nc, msg
-        
+
     @staticmethod
     def check_label_files(args):
         img_path, lb_path = args
@@ -334,7 +334,7 @@ def check_label_files(args):
                     assert all(len(l) == 5 for l in labels), f'{lb_path}: wrong label format.'
                     assert (labels >= 0).all(), f'{lb_path}: Label values error: all values in label file must > 0'
                     assert (labels[:, 1:] <= 1).all(), f'{lb_path}: Label values error: all coordinates must be normalized'
-                    
+
                     _, indices = np.unique(labels, axis=0, return_index=True)
                     if len(indices) < len(labels):  # duplicate row check
                         labels = labels[indices]  # remove duplicates
@@ -346,20 +346,20 @@ def check_label_files(args):
             else:
                 nm = 1  # label missing
                 labels = []
-                
+
             return img_path,labels, nc, nm, nf, ne, msg
         except Exception as e:
             nc = 1
             msg = f'WARNING: {lb_path}: ignoring invalid labels: {e}'
             return None, None, nc, nm, nf, ne, msg
-        
+
     @staticmethod
     def generate_coco_format_labels(img_info,class_names,save_path):
         # for evaluation with pycocotools
         dataset = {'categories': [], 'annotations': [], 'images': []}
         for i, class_name in enumerate(class_names):
             dataset['categories'].append({'id': i, 'name': class_name, 'supercategory': ''})
-        
+
         ann_id = 0
         LOGGER.info(f'Convert to COCO format')
         for i, (img_path,info) in enumerate(tqdm(img_info.items())):
@@ -379,7 +379,7 @@ def generate_coco_format_labels(img_info,class_names,save_path):
                     x2 = (x + w / 2) * img_w
                     y2 = (y + h / 2) * img_h
                     # cls_id starts from 0
-                    cls_id = int(c)   
+                    cls_id = int(c)
                     w = max(0, x2 - x1)
                     h = max(0, y2 - y1)
                     dataset['annotations'].append({
@@ -396,4 +396,4 @@ def generate_coco_format_labels(img_info,class_names,save_path):
 
         with open(save_path, 'w') as f:
             json.dump(dataset, f)
-            LOGGER.info(f'Convert to COCO format finished. Resutls saved in {save_path}')
\ No newline at end of file
+            LOGGER.info(f'Convert to COCO format finished. Resutls saved in {save_path}')
diff --git a/yolov6/layers/common.py b/yolov6/layers/common.py
index 5aadb89a..800659e8 100644
--- a/yolov6/layers/common.py
+++ b/yolov6/layers/common.py
@@ -13,7 +13,7 @@ class SiLU(nn.Module):
     '''Activation of SiLU'''
     @staticmethod
     def forward(x):
-        return x * torch.sigmoid(x)   
+        return x * torch.sigmoid(x)
 
 
 class Conv(nn.Module):
@@ -252,7 +252,7 @@ def switch_to_deploy(self):
         self.deploy = True
 
 
-class DetectBackend(nn.Module):   
+class DetectBackend(nn.Module):
     def __init__(self, weights='yolov6s.pt', device=None, dnn=True):
 
         super().__init__()
diff --git a/yolov6/models/effidehead.py b/yolov6/models/effidehead.py
index 487fbea9..1e69491d 100644
--- a/yolov6/models/effidehead.py
+++ b/yolov6/models/effidehead.py
@@ -77,11 +77,11 @@ def forward(self, x):
                 if self.inplace:
                     y[..., 0:2] = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = torch.exp(y[..., 2:4]) * self.stride[i] # wh
-                else:  
+                else:
                     xy = (y[..., 0:2] + self.grid[i]) * self.stride[i]  # xy
                     wh = torch.exp(y[..., 2:4]) * self.stride[i]  # wh
                     y = torch.cat((xy, wh, y[..., 4:]), -1)
-                z.append(y.view(bs, -1, self.num_outputs))    
+                z.append(y.view(bs, -1, self.num_outputs))
         return x if self.training else torch.cat(z, 1)
 
 
diff --git a/yolov6/models/loss.py b/yolov6/models/loss.py
index 9cf9cc6d..b86e4830 100644
--- a/yolov6/models/loss.py
+++ b/yolov6/models/loss.py
@@ -27,22 +27,22 @@ def __init__(self,
                  n_anchors=1,
                  iou_type='ciou'
                  ):
-        
+
         self.reg_weight = reg_weight
         self.iou_weight = iou_weight
         self.cls_weight = cls_weight
-        
+
         self.center_radius = center_radius
         self.eps = eps
         self.n_anchors = n_anchors
         self.strides = strides
         self.grids = [torch.zeros(1)] * len(in_channels)
-        
+
         # Define criteria
         self.l1_loss = nn.L1Loss(reduction="none")
         self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none")
         self.iou_loss = IOUloss(iou_type=iou_type, reduction="none")
-        
+
     def __call__(
         self,
         outputs,
@@ -53,29 +53,29 @@ def __call__(
         loss_cls, loss_obj, loss_iou, loss_l1 = torch.zeros(1, device=device), torch.zeros(1, device=device), \
             torch.zeros(1, device=device), torch.zeros(1, device=device)
         num_classes = outputs[0].shape[-1] - 5
-        
+
         outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides = self.get_outputs_and_grids(
             outputs, self.strides, dtype, device)
-        
+
         total_num_anchors = outputs.shape[1]
         bbox_preds = outputs[:, :, :4]  # [batch, n_anchors_all, 4]
         bbox_preds_org = outputs_origin[:, :, :4]  # [batch, n_anchors_all, 4]
         obj_preds = outputs[:, :, 4].unsqueeze(-1)  # [batch, n_anchors_all, 1]
         cls_preds = outputs[:, :, 5:]  # [batch, n_anchors_all, n_cls]
-          
+
         # targets
         batch_size = bbox_preds.shape[0]
-        targets_list = np.zeros((batch_size, 1, 5)).tolist() 
+        targets_list = np.zeros((batch_size, 1, 5)).tolist()
         for i, item in enumerate(targets.cpu().numpy().tolist()):
             targets_list[int(item[0])].append(item[1:])
         max_len = max((len(l) for l in targets_list))
-        
+
         targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device)
         num_targets_list = (targets.sum(dim=2) > 0).sum(dim=1)  # number of objects
-       
+
         num_fg, num_gts = 0, 0
         cls_targets, reg_targets, l1_targets, obj_targets, fg_masks = [], [], [], [], []
-        
+
         for batch_idx in range(batch_size):
             num_gt = int(num_targets_list[batch_idx])
             num_gts += num_gt
@@ -86,12 +86,12 @@ def __call__(
                 obj_target = outputs.new_zeros((total_num_anchors, 1))
                 fg_mask = outputs.new_zeros(total_num_anchors).bool()
             else:
-                
+
                 gt_bboxes_per_image = targets[batch_idx, :num_gt, 1:5].mul_(gt_bboxes_scale)
                 gt_classes = targets[batch_idx, :num_gt, 0]
                 bboxes_preds_per_image = bbox_preds[batch_idx]
-                cls_preds_per_image = cls_preds[batch_idx]  
-                obj_preds_per_image = obj_preds[batch_idx]  
+                cls_preds_per_image = cls_preds[batch_idx]
+                obj_preds_per_image = obj_preds[batch_idx]
 
                 try:
                     (
@@ -108,12 +108,12 @@ def __call__(
                         gt_classes,
                         bboxes_preds_per_image,
                         cls_preds_per_image,
-                        obj_preds_per_image, 
+                        obj_preds_per_image,
                         expanded_strides,
                         xy_shifts,
                         num_classes
                     )
-                    
+
                 except RuntimeError:
                     print(
                         "OOM RuntimeError is raised due to the huge memory cost during label assignment. \
@@ -121,14 +121,14 @@ def __call__(
                            try to reduce the batch size or image size."
                     )
                     torch.cuda.empty_cache()
-                    print("------------CPU Mode for This Batch-------------")   
-                         
+                    print("------------CPU Mode for This Batch-------------")
+
                     _gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()
                     _gt_classes = gt_classes.cpu().float()
                     _bboxes_preds_per_image = bboxes_preds_per_image.cpu().float()
                     _cls_preds_per_image = cls_preds_per_image.cpu().float()
                     _obj_preds_per_image = obj_preds_per_image.cpu().float()
-                    
+
                     _expanded_strides = expanded_strides.cpu().float()
                     _xy_shifts = xy_shifts.cpu()
 
@@ -146,7 +146,7 @@ def __call__(
                         _gt_classes,
                         _bboxes_preds_per_image,
                         _cls_preds_per_image,
-                        _obj_preds_per_image, 
+                        _obj_preds_per_image,
                         _expanded_strides,
                         _xy_shifts,
                         num_classes
@@ -156,7 +156,7 @@ def __call__(
                     fg_mask = fg_mask.cuda()
                     pred_ious_this_matching = pred_ious_this_matching.cuda()
                     matched_gt_inds = matched_gt_inds.cuda()
-                
+
                 torch.cuda.empty_cache()
                 num_fg += num_fg_img
                 if num_fg_img > 0:
@@ -165,37 +165,37 @@ def __call__(
                     ) * pred_ious_this_matching.unsqueeze(-1)
                     obj_target = fg_mask.unsqueeze(-1)
                     reg_target = gt_bboxes_per_image[matched_gt_inds]
-                    
+
                     l1_target = self.get_l1_target(
                         outputs.new_zeros((num_fg_img, 4)),
                         gt_bboxes_per_image[matched_gt_inds],
                         expanded_strides[0][fg_mask],
                         xy_shifts=xy_shifts[0][fg_mask],
                     )
-                    
+
             cls_targets.append(cls_target)
             reg_targets.append(reg_target)
             obj_targets.append(obj_target)
             l1_targets.append(l1_target)
             fg_masks.append(fg_mask)
-        
+
         cls_targets = torch.cat(cls_targets, 0)
         reg_targets = torch.cat(reg_targets, 0)
         obj_targets = torch.cat(obj_targets, 0)
         l1_targets = torch.cat(l1_targets, 0)
         fg_masks = torch.cat(fg_masks, 0)
-       
+
         num_fg = max(num_fg, 1)
         # loss
         loss_iou += (self.iou_loss(bbox_preds.view(-1, 4)[fg_masks].T, reg_targets)).sum() / num_fg
         loss_l1 += (self.l1_loss(bbox_preds_org.view(-1, 4)[fg_masks], l1_targets)).sum() / num_fg
-        
+
         loss_obj += (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets*1.0)).sum() / num_fg
         loss_cls += (self.bcewithlog_loss(cls_preds.view(-1, num_classes)[fg_masks], cls_targets)).sum() / num_fg
-    
+
         total_losses = self.reg_weight * loss_iou + loss_l1 + loss_obj + loss_cls
         return total_losses, torch.cat((self.reg_weight * loss_iou, loss_l1, loss_obj, loss_cls)).detach()
-    
+
     def decode_output(self, output, k, stride, dtype, device):
         grid = self.grids[k].to(device)
         batch_size = output.shape[0]
@@ -204,51 +204,51 @@ def decode_output(self, output, k, stride, dtype, device):
             yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)])
             grid = torch.stack((xv, yv), 2).view(1, 1, hsize, wsize, 2).type(dtype).to(device)
             self.grids[k] = grid
-        
+
         output = output.reshape(batch_size, self.n_anchors * hsize * wsize, -1)
         output_origin = output.clone()
         grid = grid.view(1, -1, 2)
-        
+
         output[..., :2] = (output[..., :2] + grid) * stride
         output[..., 2:4] = torch.exp(output[..., 2:4]) * stride
-        
+
         return output, output_origin, grid, hsize, wsize
-    
+
     def get_outputs_and_grids(self, outputs, strides, dtype, device):
         xy_shifts = []
         expanded_strides = []
         outputs_new = []
         outputs_origin = []
-    
+
         for k, output in enumerate(outputs):
             output, output_origin, grid, feat_h, feat_w = self.decode_output(
                 output, k, strides[k], dtype, device)
-            
+
             xy_shift = grid
             expanded_stride = torch.full((1, grid.shape[1], 1), strides[k], dtype=grid.dtype, device=grid.device)
-   
+
             xy_shifts.append(xy_shift)
             expanded_strides.append(expanded_stride)
             outputs_new.append(output)
             outputs_origin.append(output_origin)
-        
+
         xy_shifts = torch.cat(xy_shifts, 1)  # [1, n_anchors_all, 2]
         expanded_strides = torch.cat(expanded_strides, 1) # [1, n_anchors_all, 1]
         outputs_origin = torch.cat(outputs_origin, 1)
         outputs = torch.cat(outputs_new, 1)
-        
+
         feat_h *= strides[-1]
         feat_w *= strides[-1]
-        gt_bboxes_scale = torch.Tensor([[feat_w, feat_h, feat_w, feat_h]]).type_as(outputs) 
-        
+        gt_bboxes_scale = torch.Tensor([[feat_w, feat_h, feat_w, feat_h]]).type_as(outputs)
+
         return outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides
-    
+
     def get_l1_target(self, l1_target, gt, stride, xy_shifts, eps=1e-8):
-       
+
         l1_target[:, 0:2] = gt[:, 0:2] / stride - xy_shifts
         l1_target[:, 2:4] = torch.log(gt[:, 2:4] / stride + eps)
         return l1_target
-    
+
     @torch.no_grad()
     def get_assignments(
         self,
@@ -259,12 +259,12 @@ def get_assignments(
         gt_classes,
         bboxes_preds_per_image,
         cls_preds_per_image,
-        obj_preds_per_image, 
+        obj_preds_per_image,
         expanded_strides,
         xy_shifts,
         num_classes
     ):
-        
+
         fg_mask, is_in_boxes_and_center = self.get_in_boxes_info(
             gt_bboxes_per_image,
             expanded_strides,
@@ -272,23 +272,23 @@ def get_assignments(
             total_num_anchors,
             num_gt,
         )
-    
+
         bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]
         cls_preds_ = cls_preds_per_image[fg_mask]
         obj_preds_ = obj_preds_per_image[fg_mask]
         num_in_boxes_anchor = bboxes_preds_per_image.shape[0]
-    
+
         # cost
         pair_wise_ious = pairwise_bbox_iou(gt_bboxes_per_image, bboxes_preds_per_image, box_format='xywh')
         pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)
-        
+
         gt_cls_per_image = (
             F.one_hot(gt_classes.to(torch.int64), num_classes)
             .float()
             .unsqueeze(1)
             .repeat(1, num_in_boxes_anchor, 1)
         )
-        
+
         with torch.cuda.amp.autocast(enabled=False):
             cls_preds_ = (
                 cls_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1)
@@ -304,7 +304,7 @@ def get_assignments(
             + self.iou_weight * pair_wise_ious_loss
             + 100000.0 * (~is_in_boxes_and_center)
         )
-        
+
         (
             num_fg,
             gt_matched_classes,
@@ -385,7 +385,7 @@ def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
         topk_ious, _ = torch.topk(ious_in_boxes_matrix, n_candidate_k, dim=1)
         dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1)
         dynamic_ks = dynamic_ks.tolist()
-         
+
         for gt_idx in range(num_gt):
             _, pos_idx = torch.topk(
                 cost[gt_idx], k=dynamic_ks[gt_idx], largest=False
diff --git a/yolov6/models/reppan.py b/yolov6/models/reppan.py
index ee12036f..2571d687 100644
--- a/yolov6/models/reppan.py
+++ b/yolov6/models/reppan.py
@@ -106,8 +106,3 @@ def forward(self, input):
         outputs = [pan_out2, pan_out1, pan_out0]
 
         return outputs
-
-        
-
-
-
diff --git a/yolov6/solver/build.py b/yolov6/solver/build.py
index 601e9610..c18c97bb 100644
--- a/yolov6/solver/build.py
+++ b/yolov6/solver/build.py
@@ -16,13 +16,13 @@ def build_optimizer(cfg, model):
             g_bnw.append(v.weight)
         elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
             g_w.append(v.weight)
-        
+
     assert cfg.solver.optim == 'SGD' or 'Adam', 'ERROR: unknown optimizer, use SGD defaulted'
     if cfg.solver.optim == 'SGD':
         optimizer = torch.optim.SGD(g_bnw, lr=cfg.solver.lr0, momentum=cfg.solver.momentum, nesterov=True)
     elif cfg.solver.optim == 'Adam':
         optimizer = torch.optim.Adam(g_bnw, lr=cfg.solver.lr0, betas=(cfg.solver.momentum, 0.999))
-            
+
     optimizer.add_param_group({'params': g_w, 'weight_decay': cfg.solver.weight_decay})
     optimizer.add_param_group({'params': g_b})
 
@@ -33,9 +33,9 @@ def build_optimizer(cfg, model):
 def build_lr_scheduler(cfg, optimizer, epochs):
     """Build learning rate scheduler from cfg file."""
     if cfg.solver.lr_scheduler == 'Cosine':
-        lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg.solver.lrf - 1) + 1        
+        lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg.solver.lrf - 1) + 1
     else:
         LOGGER.error('unknown lr scheduler, use Cosine defaulted')
 
     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
-    return scheduler, lf
\ No newline at end of file
+    return scheduler, lf
diff --git a/yolov6/utils/checkpoint.py b/yolov6/utils/checkpoint.py
index bf4dc05f..1cc6a333 100644
--- a/yolov6/utils/checkpoint.py
+++ b/yolov6/utils/checkpoint.py
@@ -25,9 +25,9 @@ def load_checkpoint(weights, map_location=None, inplace=True, fuse=True):
     model = ckpt['ema' if ckpt.get('ema') else 'model'].float()
     if fuse:
         LOGGER.info("\nFusing model...")
-        model = fuse_model(model).eval() 
+        model = fuse_model(model).eval()
     else:
-        model = model.eval()       
+        model = model.eval()
     return model
 
 
@@ -40,7 +40,7 @@ def save_checkpoint(ckpt, is_best, save_dir, model_name=""):
     if is_best:
         best_filename = osp.join(save_dir, 'best_ckpt.pt')
         shutil.copyfile(filename, best_filename)
-        
+
 
 def strip_optimizer(ckpt_dir):
     for s in ['best', 'last']:
@@ -57,6 +57,3 @@ def strip_optimizer(ckpt_dir):
         for p in ckpt['model'].parameters():
             p.requires_grad = False
         torch.save(ckpt, ckpt_path)
-        
-
-
diff --git a/yolov6/utils/config.py b/yolov6/utils/config.py
index d88fbe86..7f9c13a3 100644
--- a/yolov6/utils/config.py
+++ b/yolov6/utils/config.py
@@ -31,7 +31,7 @@ def __getattr__(self, name):
 
 
 class Config(object):
-    
+
     @staticmethod
     def _file2dict(filename):
         filename = str(filename)
@@ -56,7 +56,7 @@ def _file2dict(filename):
             cfg_text += f.read()
 
         return cfg_dict, cfg_text
-    
+
     @staticmethod
     def fromfile(filename):
         cfg_dict, cfg_text = Config._file2dict(filename)
@@ -99,4 +99,3 @@ def __setattr__(self, name, value):
         if isinstance(value, dict):
             value = ConfigDict(value)
         self._cfg_dict.__setattr__(name, value)
-       
diff --git a/yolov6/utils/ema.py b/yolov6/utils/ema.py
index 4b7877bf..3bdaf29d 100644
--- a/yolov6/utils/ema.py
+++ b/yolov6/utils/ema.py
@@ -19,7 +19,7 @@ class ModelEMA:
 
     def __init__(self, model, decay=0.9999, updates=0):
         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
-        self.updates = updates  
+        self.updates = updates
         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
         for param in self.ema.parameters():
             param.requires_grad_(False)
@@ -55,4 +55,4 @@ def is_parallel(model):
 
 def de_parallel(model):
     # De-parallelize a model. Return single-GPU model if model's type is DP or DDP.
-    return model.module if is_parallel(model) else model
\ No newline at end of file
+    return model.module if is_parallel(model) else model
diff --git a/yolov6/utils/envs.py b/yolov6/utils/envs.py
index bfcae5c5..2e04d532 100644
--- a/yolov6/utils/envs.py
+++ b/yolov6/utils/envs.py
@@ -21,9 +21,9 @@ def select_device(device):
     Args:
         device: a string, like 'cpu' or '1,2,3,4'
     Returns:
-        torch.device 
+        torch.device
     """
-    if device == 'cpu': 
+    if device == 'cpu':
         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
         LOGGER.info('Using CPU for training... ')
     elif device:
@@ -51,7 +51,3 @@ def set_random_seed(seed, deterministic=False):
     else:
         cudnn.deterministic = False
         cudnn.benchmark = True
-
-
-
-
diff --git a/yolov6/utils/events.py b/yolov6/utils/events.py
index ec2f15a6..3fee8c91 100644
--- a/yolov6/utils/events.py
+++ b/yolov6/utils/events.py
@@ -31,11 +31,8 @@ def write_tblog(tblogger, epoch, results, losses):
     """Display mAP and loss information to log."""
     tblogger.add_scalar("val/mAP@0.5", results[0], epoch + 1)
     tblogger.add_scalar("val/mAP@0.50:0.95", results[1], epoch + 1)
-    
+
     tblogger.add_scalar("train/iou_loss", losses[0], epoch + 1)
     tblogger.add_scalar("train/l1_loss", losses[1], epoch + 1)
     tblogger.add_scalar("train/obj_loss", losses[2], epoch + 1)
     tblogger.add_scalar("train/cls_loss", losses[3], epoch + 1)
-    
-        
-        
diff --git a/yolov6/utils/figure_iou.py b/yolov6/utils/figure_iou.py
index 2f12ec50..f3a0f3f7 100644
--- a/yolov6/utils/figure_iou.py
+++ b/yolov6/utils/figure_iou.py
@@ -5,7 +5,7 @@
 
 
 class IOUloss:
-    """ Calculate IoU loss. 
+    """ Calculate IoU loss.
     """
     def __init__(self, box_format='xywh', iou_type='ciou', reduction='none', eps=1e-7):
         """ Setting of the class.
@@ -13,7 +13,7 @@ def __init__(self, box_format='xywh', iou_type='ciou', reduction='none', eps=1e-
             box_format: (string), must be one of 'xywh' or 'xyxy'.
             iou_type: (string), can be one of 'ciou', 'diou', 'giou' or 'siou'
             reduction: (string), specifies the reduction to apply to the output, must be one of 'none', 'mean','sum'.
-            eps: (float), a value to avoid devide by zero error. 
+            eps: (float), a value to avoid devide by zero error.
         """
         self.box_format = box_format
         self.iou_type = iou_type.lower()
diff --git a/yolov6/utils/nms.py b/yolov6/utils/nms.py
index dc8c1417..9c61b7cc 100644
--- a/yolov6/utils/nms.py
+++ b/yolov6/utils/nms.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
-# The code is based on 
+# The code is based on
 # https://github.com/ultralytics/yolov5/blob/master/utils/general.py
 
 import os
@@ -35,7 +35,7 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
         prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
         conf_thres: (float) confidence threshold.
         iou_thres: (float) iou threshold.
-        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide. 
+        classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
         agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
         multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
         max_det:(int), max number of output bboxes.
@@ -104,7 +104,3 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
             break  # time limit exceeded
 
     return output
-
-
-
-