update fasterrcnn comments

weidadeyangge · Jun 10, 2020 · 5349fdd · 5349fdd
1 parent 77482c2
commit 5349fdd
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 14 deletions.
diff --git a/pytorch_object_detection/faster_rcnn/network_files/faster_rcnn_framework.py b/pytorch_object_detection/faster_rcnn/network_files/faster_rcnn_framework.py
@@ -239,7 +239,7 @@ class FasterRCNN(FasterRCNNBase):
 
     def __init__(self, backbone, num_classes=None,
                  # transform parameter
-                 min_size=800, max_size=1333,      # 预处理resize时限制的最小尺寸与最大尺寸
+                 min_size=800, max_size=1344,      # 预处理resize时限制的最小尺寸与最大尺寸
                  image_mean=None, image_std=None,  # 预处理normalize时使用的均值和方差
                  # RPN parameters
                  rpn_anchor_generator=None, rpn_head=None,

diff --git a/pytorch_object_detection/faster_rcnn/network_files/rpn_function.py b/pytorch_object_detection/faster_rcnn/network_files/rpn_function.py
@@ -10,7 +10,7 @@
 
 @torch.jit.unused
 def _onnx_get_num_anchors_and_pre_nms_top_n(ob, orig_pre_nms_top_n):
-    # type: (Tensor, int) -> Tuple[int, int]
+    # type: (Tensor, int) -> Tuple[int, Tensor]
     from torch.onnx import operators
     num_anchors = operators.shape_as_tensor(ob)[1].unsqueeze(0)
     # TODO : remove cast to IntTensor/num_anchors.dtype when

diff --git a/pytorch_object_detection/faster_rcnn/network_files/transform.py b/pytorch_object_detection/faster_rcnn/network_files/transform.py
@@ -1,5 +1,5 @@
 import torch
-from torch import nn
+from torch import nn, Tensor
 import random
 import math
 from network_files.image_list import ImageList
@@ -33,10 +33,11 @@ def normalize(self, image):
         dtype, device = image.dtype, image.device
         mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
         std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
+        # [:, None, None]: shape [3] -> [3, 1, 1]
         return (image - mean[:, None, None]) / std[:, None, None]
 
     def torch_choice(self, l):
-        # type: (List[int])
+        # type: (List[int]) -> int
         """
         Implements `random.choice` via torch ops so it can be compiled with
         TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
@@ -46,7 +47,7 @@ def torch_choice(self, l):
         return l[index]
 
     def resize(self, image, target):
-        # type: (Tensor, Optional[Dict[str, Tensor]])
+        # type: (Tensor, Optional[Dict[str, Tensor]]) -> (Tensor, Optional[Dict[str, Tensor]])
         """
         将图片缩放到指定的大小范围内，并对应缩放bboxes信息
         Args:
@@ -63,10 +64,10 @@ def resize(self, image, target):
         min_size = float(torch.min(im_shape))  # 获取长宽中的最小值
         max_size = float(torch.max(im_shape))  # 获取长宽中的最大值
         if self.training:
-            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长
+            size = float(self.torch_choice(self.min_size))  # 指定输入图片的最小边长,注意是self.min_size不是min_size
         else:
             # FIXME assume for now that testing uses the largest scale
-            size = float(self.min_size[-1])    # 指定输入图片的最小边长
+            size = float(self.min_size[-1])    # 指定输入图片的最小边长,注意是self.min_size不是min_size
         scale_factor = size / min_size  # 根据指定最小边长和图片最小边长计算缩放比例
 
         # 如果使用该缩放比例计算的图片最大边长大于指定的最大边长
@@ -75,6 +76,7 @@ def resize(self, image, target):
 
         # interpolate利用插值的方法缩放图片
         # image[None]操作是在最前面添加batch维度[C, H, W] -> [N, C, H, W]
+        # bilinear只支持4D Tensor
         image = torch.nn.functional.interpolate(
             image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0]
 
@@ -122,7 +124,7 @@ def max_by_axis(self, the_list):
         return maxes
 
     def batch_images(self, images, size_divisible=32):
-        # type: (List[Tensor], int)
+        # type: (List[Tensor], int) -> Tensor
         """
         将一批图像打包成一个batch返回（注意batch中每个tensor的shape是相同的）
         Args:
@@ -142,7 +144,7 @@ def batch_images(self, images, size_divisible=32):
         max_size = self.max_by_axis([list(img.shape) for img in images])
 
         stride = float(size_divisible)
-        max_size = list(max_size)
+        # max_size = list(max_size)
         # 将height向上调整到stride的整数倍
         max_size[1] = int(math.ceil(float(max_size[1]) / stride) * stride)
         # 将width向上调整到stride的整数倍
@@ -156,12 +158,13 @@ def batch_images(self, images, size_divisible=32):
         for img, pad_img in zip(images, batched_imgs):
             # 将输入images中的每张图片复制到新的batched_imgs的每张图片中，对齐左上角，保证bboxes的坐标不变
             # 这样保证输入到网络中一个batch的每张图片的shape相同
+            # copy_: Copies the elements from src into self tensor and returns self
             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
 
         return batched_imgs
 
     def postprocess(self, result, image_shapes, original_image_sizes):
-        # type: (List[Dict[str, Tensor]], List[Tuple[int, int]], List[Tuple[int, int]])
+        # type: (List[Dict[str, Tensor]], List[Tuple[int, int]], List[Tuple[int, int]]) -> List[Dict[str, Tensor]]
         """
         对网络的预测结果进行后处理（主要将bboxes还原到原图像尺度上）
         Args:
@@ -181,6 +184,7 @@ def postprocess(self, result, image_shapes, original_image_sizes):
         return result
 
     def __repr__(self):
+        """自定义输出实例化对象的信息，可通过print打印实例信息"""
         format_string = self.__class__.__name__ + '('
         _indent = '\n    '
         format_string += "{0}Normalize(mean={1}, std={2})".format(_indent, self.image_mean, self.image_std)
@@ -204,9 +208,11 @@ def forward(self, images, targets=None):
             if targets is not None and target_index is not None:
                 targets[i] = target_index
 
+        # 记录resize后的图像尺寸
         image_sizes = [img.shape[-2:] for img in images]
         images = self.batch_images(images)  # 将images打包成一个batch
         image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], [])
+
         for image_size in image_sizes:
             assert len(image_size) == 2
             image_sizes_list.append((image_size[0], image_size[1]))
@@ -216,7 +222,7 @@ def forward(self, images, targets=None):
 
 
 def resize_boxes(boxes, original_size, new_size):
-    # type: (Tensor, List[int], List[int])
+    # type: (Tensor, List[int], List[int]) -> Tensor
     """
     将boxes参数根据图像的缩放情况进行相应缩放
 
@@ -246,6 +252,3 @@ def resize_boxes(boxes, original_size, new_size):
 
 
 
-
-
-