add train on mult-GPU

realleewq · Jun 27, 2020 · ee00ae5 · ee00ae5
1 parent 83ffe1e
commit ee00ae5
Show file tree

Hide file tree

Showing 13 changed files with 572 additions and 55 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,4 @@ flower_data
 checkpoint
 data
 VOCdevkit
+ssd_resnet50_v1_fpn_shared_box_predictor
diff --git a/pytorch_object_detection/RetinaNet/src/loss.py b/pytorch_object_detection/RetinaNet/src/loss.py
@@ -7,7 +7,7 @@ class Loss(nn.Module):
         Implements the loss as the sum of the followings:
         1. Confidence Loss: All labels, with hard negative mining
         2. Localization Loss: Only on positive labels
-        Suppose input dboxes has the shape 8732x4
+        Suppose input dboxes has the shape 76725x4
     """
     def __init__(self, dboxes):
         super(Loss, self).__init__()
@@ -39,42 +39,128 @@ def _location_vec(self, loc):
     def forward(self, ploc, plabel, gloc, glabel):
         # type: (Tensor, Tensor, Tensor, Tensor)
         """
-            ploc, plabel: Nx4x8732, Nxlabel_numx8732
+            ploc, plabel: Nx4x76725, Nxlabel_numx76725
                 predicted location and labels
 
-            gloc, glabel: Nx4x8732, Nx8732
+            gloc, glabel: Nx4x76725, Nx76725
                 ground truth location and labels
         """
-        # 获取正样本的mask  Tensor: [N, 8732]
+        # 获取正样本的mask  Tensor: [N, 76725]
         mask = glabel > 0
         # mask1 = torch.nonzero(glabel)
         # 计算一个batch中的每张图片的正样本个数 Tensor: [N]
         pos_num = mask.sum(dim=1)
 
-        # 计算gt的location回归参数 Tensor: [N, 4, 8732]
+        # 计算gt的location回归参数 Tensor: [N, 4, 76725]
         vec_gd = self._location_vec(gloc)
 
         # sum on four coordinates, and mask
         # 计算定位损失(只有正样本)
-        loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1)  # Tensor: [N, 8732]
+        loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1)  # Tensor: [N, 76725]
         loc_loss = (mask.float() * loc_loss).sum(dim=1)  # Tenosr: [N]
 
-        # hard negative mining Tenosr: [N, 8732]
+        # hard negative mining Tenosr: [N, 76725]
         con = self.confidence_loss(plabel, glabel)
 
         # positive mask will never selected
         # 获取负样本
         con_neg = con.clone()
         con_neg[mask] = torch.tensor(0.0)
-        # 按照confidence_loss降序排列 con_idx(Tensor: [N, 8732])
+        # 按照confidence_loss降序排列 con_idx(Tensor: [N, 76725])
         _, con_idx = con_neg.sort(dim=1, descending=True)
         _, con_rank = con_idx.sort(dim=1)  # 这个步骤比较巧妙
 
         # number of negative three times positive
         # 用于损失计算的负样本数是正样本的3倍（在原论文Hard negative mining部分），
-        # 但不能超过总样本数8732
+        # 但不能超过总样本数
         neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1)
-        neg_mask = con_rank < neg_num  # Tensor [N, 8732]
+        neg_mask = con_rank < neg_num  # Tensor [N, 76725]
+
+        # confidence最终loss使用选取的正样本loss+选取的负样本loss
+        con_loss = (con * (mask.float() + neg_mask.float())).sum(dim=1)  # Tensor [N]
+
+        # avoid no object detected
+        # 避免出现图像中没有GTBOX的情况
+        total_loss = loc_loss + con_loss
+        num_mask = (pos_num > 0).float()  # 统计一个batch中的每张图像中是否存在GTBOX
+        pos_num = pos_num.float().clamp(min=1e-6)  # 防止出现分母为零的情况
+        ret = (total_loss * num_mask / pos_num).mean(dim=0)  # 只计算存在GTBOX的图像损失
+        return ret
+
+
+class FocalLoss(nn.Module):
+    """
+        Implements the loss as the sum of the followings:
+        1. Confidence Loss: All labels, with hard negative mining
+        2. Localization Loss: Only on positive labels
+        Suppose input dboxes has the shape 76725x4
+    """
+    def __init__(self, dboxes):
+        super(FocalLoss, self).__init__()
+        self.scale_xy = 1.0 / dboxes.scale_xy
+        self.scale_wh = 1.0 / dboxes.scale_wh
+
+        self.location_loss = nn.SmoothL1Loss(reduction='none')
+        # self.location_loss = nn.SmoothL1Loss(reduce=False)
+        self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0),
+                                   requires_grad=False)
+
+        # Two factor are from following links
+        # http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html
+        self.confidence_loss = nn.CrossEntropyLoss(reduction='none')
+        # self.confidence_loss = nn.CrossEntropyLoss(reduce=False)
+
+    def _location_vec(self, loc):
+        # type: (Tensor)
+        """
+        Generate Location Vectors
+        计算ground truth相对anchors的回归参数
+        :param loc:
+        :return:
+        """
+        gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :]
+        gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log()
+        return torch.cat((gxy, gwh), dim=1).contiguous()
+
+    def forward(self, ploc, plabel, gloc, glabel):
+        # type: (Tensor, Tensor, Tensor, Tensor)
+        """
+            ploc, plabel: Nx4x76725, Nxlabel_numx76725
+                predicted location and labels
+
+            gloc, glabel: Nx4x76725, Nx76725
+                ground truth location and labels
+        """
+        # 获取正样本的mask  Tensor: [N, 76725]
+        mask = glabel > 0
+        # mask1 = torch.nonzero(glabel)
+        # 计算一个batch中的每张图片的正样本个数 Tensor: [N]
+        pos_num = mask.sum(dim=1)
+
+        # 计算gt的location回归参数 Tensor: [N, 4, 76725]
+        vec_gd = self._location_vec(gloc)
+
+        # sum on four coordinates, and mask
+        # 计算定位损失(只有正样本)
+        loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1)  # Tensor: [N, 76725]
+        loc_loss = (mask.float() * loc_loss).sum(dim=1)  # Tenosr: [N]
+
+        # hard negative mining Tenosr: [N, 76725]
+        con = self.confidence_loss(plabel, glabel)
+
+        # positive mask will never selected
+        # 获取负样本
+        con_neg = con.clone()
+        con_neg[mask] = torch.tensor(0.0)
+        # 按照confidence_loss降序排列 con_idx(Tensor: [N, 76725])
+        _, con_idx = con_neg.sort(dim=1, descending=True)
+        _, con_rank = con_idx.sort(dim=1)  # 这个步骤比较巧妙
+
+        # number of negative three times positive
+        # 用于损失计算的负样本数是正样本的3倍（在原论文Hard negative mining部分），
+        # 但不能超过总样本数
+        neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1)
+        neg_mask = con_rank < neg_num  # Tensor [N, 76725]
 
         # confidence最终loss使用选取的正样本loss+选取的负样本loss
         con_loss = (con * (mask.float() + neg_mask.float())).sum(dim=1)  # Tensor [N]

diff --git a/pytorch_object_detection/RetinaNet/src/res50_backbone.py b/pytorch_object_detection/RetinaNet/src/res50_backbone.py
@@ -185,7 +185,7 @@ def __init__(self, in_channels_list, out_channels=256, extra_blocks=None):
         self.projection_blocks = nn.ModuleList()
         # 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵
         self.smoothing_blocks = nn.ModuleList()
-        for in_channels in in_channels_list:
+        for in_channels in in_channels_list:  # [512, 1024, 2048]
             projection_block = nn.Conv2d(in_channels, out_channels, 1)
             self.projection_blocks.append(projection_block)
 
@@ -271,8 +271,8 @@ def forward(self, x):
             # 将上一层的特征矩阵上采样到当前层大小
             inner_top_down = F.interpolate(last_projection, size=feat_shape, mode="nearest")
             # add
-            last_projection = projection_lateral + inner_top_down
-            last_projection = self.get_result_from_smoothing_blocks(last_projection, idx)
+            last_projection_t = projection_lateral + inner_top_down
+            last_projection = self.get_result_from_smoothing_blocks(last_projection_t, idx)
             results.insert(0, last_projection)
 
         # 在layer4对应的预测特征层基础上生成预测特征矩阵P6和P7
@@ -311,7 +311,7 @@ def __init__(self, backbone, return_layers, in_channels_list, out_channels):
         super(BackboneWithFPN, self).__init__()
         self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
         self.fpn = FeaturePyramidNetwork(
-            in_channels_list=in_channels_list,
+            in_channels_list=in_channels_list,   # [512, 1024, 2048]
             out_channels=out_channels,
             extra_blocks=LastLevelMaxPool(in_channels=out_channels),
             )

diff --git a/pytorch_object_detection/RetinaNet/src/retina_model.py b/pytorch_object_detection/RetinaNet/src/retina_model.py
@@ -61,7 +61,7 @@ def __init__(self, num_features=5, in_channels=256,
         self.shared_box_tower_conv = nn.ModuleList()
         self.shared_class_tower_conv = nn.ModuleList()
         # 构建共享的predictor tower权重
-        for i in range(num_layers_before_predictor):
+        for i in range(num_layers_before_predictor):  # [0, 1, 2, 3]
             self.shared_box_tower_conv.append(nn.Conv2d(in_channels, in_channels, 3, 1, 1, bias=False))
             self.shared_class_tower_conv.append(nn.Conv2d(in_channels, in_channels, 3, 1, 1, bias=False))
 
@@ -71,13 +71,13 @@ def __init__(self, num_features=5, in_channels=256,
         self.unshared_box_tower_relu6 = nn.ModuleList()
         self.unshared_class_tower_relu6 = nn.ModuleList()
         # 每个预测特征层的bn和activation都不共享
-        for i in range(num_features):
+        for i in range(num_features):  # [0, 1, 2, 3, 4]
             box_bn_every_layer = nn.ModuleList()
             box_relu6_every_layer = nn.ModuleList()
             class_bn_every_layer = nn.ModuleList()
             class_relu6_every_layer = nn.ModuleList()
 
-            for j in range(num_layers_before_predictor):
+            for j in range(num_layers_before_predictor):  # [0, 1, 2, 3]
                 box_bn_every_layer.append(nn.BatchNorm2d(in_channels))
                 box_relu6_every_layer.append(nn.ReLU6(inplace=True))
 
@@ -96,17 +96,21 @@ def __init__(self, num_features=5, in_channels=256,
 
     def _init_weights(self):
         layers = [*self.shared_box_tower_conv, *self.shared_class_tower_conv,
-                  self.box_predictor, self.class_predictor]
+                  self.box_predictor]
         for layer in layers:
             if isinstance(layer, nn.Conv2d):
                 nn.init.xavier_uniform_(layer.weight)
                 if layer.bias is not None:
                     nn.init.constant_(layer.bias, 0)
 
+        # 参考tf初始化方法
+        nn.init.normal_(self.class_predictor.weight, mean=0.0, std=0.01)
+        nn.init.constant_(self.class_predictor.bias, -4.6)
+
     def forward(self, features):
         class_outputs = torch.jit.annotate(List[Tensor], [])
         box_outputs = torch.jit.annotate(List[Tensor], [])
-        for i in range(self.num_features):
+        for i in range(self.num_features):  # [0, 1, 2, 3, 4]
             feature = features[i]
             box_output = feature
             class_output = feature