update ssd code

wymmd · Jun 6, 2020 · 350f7bc · 350f7bc
1 parent 67bb0b7
commit 350f7bc
Show file tree

Hide file tree

Showing 12 changed files with 431 additions and 120 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,9 +5,11 @@ __pycache__
 flower_data
 *.h5
 *.pth
+*.pt
 *.jpg
 *.ckpt.*
 *.ckpt
 *.config
 checkpoint
 data
+VOCdevkit
diff --git a/pytorch_object_detection/faster_rcnn/network_files/boxes.py b/pytorch_object_detection/faster_rcnn/network_files/boxes.py
@@ -74,9 +74,10 @@ def batched_nms(boxes, scores, idxs, iou_threshold):
     max_coordinate = boxes.max()
 
     # to(): Performs Tensor dtype and/or device conversion
-    # 为每一层预测特征层生成一个很大的偏移量
+    # 为每一个类别生成一个很大的偏移量
+    # 这里的to只是让生成tensor的dytpe和device与boxes保持一致
     offsets = idxs.to(boxes) * (max_coordinate + 1)
-    # boxes加上对应层的偏移量后，保证不同层之间boxes不会有重合的现象
+    # boxes加上对应层的偏移量后，保证不同类别之间boxes不会有重合的现象
     boxes_for_nms = boxes + offsets[:, None]
     keep = nms(boxes_for_nms, scores, iou_threshold)
     return keep

diff --git a/pytorch_object_detection/faster_rcnn/network_files/rpn_function.py b/pytorch_object_detection/faster_rcnn/network_files/rpn_function.py
@@ -258,6 +258,7 @@ def permute_and_flatten(layer, N, A, C, H, W):
     # view和reshape功能是一样的，先展平所有元素在按照给定shape排列
     # view函数只能用于内存中连续存储的tensor，permute等操作会使tensor在内存中变得不再连续，此时就不能再调用view函数
     # reshape则不需要依赖目标tensor是否在内存中是连续的
+    # [batch_size, anchors_num_per_position * (C or 4), height, width]
     layer = layer.view(N, -1, C,  H, W)
     # 调换tensor维度
     layer = layer.permute(0, 3, 4, 1, 2)  # [N, H, W, -1, C]
@@ -280,6 +281,7 @@ def concat_box_prediction_layers(box_cls, box_regression):
     box_cls_flattened = []
     box_regression_flattened = []
 
+    # 遍历每个预测特征层
     for box_cls_per_level, box_regression_per_level in zip(box_cls, box_regression):
         # [batch_size, anchors_num_per_position * classes_num, height, width]
         # 注意，当计算RPN中的proposal时，classes_num=1,只区分目标和背景

diff --git a/pytorch_object_detection/ssd/README.md b/pytorch_object_detection/ssd/README.md
@@ -0,0 +1 @@
+# 代码完善中，敬请期待...
diff --git a/pytorch_object_detection/ssd/plot_curve.py b/pytorch_object_detection/ssd/plot_curve.py
@@ -0,0 +1,43 @@
+import matplotlib.pyplot as plt
+
+
+def plot_loss_and_lr(train_loss, learning_rate):
+    try:
+        x = list(range(len(train_loss)))
+        fig, ax1 = plt.subplots(1, 1)
+        ax1.plot(x, train_loss, 'r', label='loss')
+        ax1.set_xlabel("step")
+        ax1.set_ylabel("loss")
+        ax1.set_title("Train Loss and lr")
+        plt.legend(loc='best')
+
+        ax2 = ax1.twinx()
+        ax2.plot(x, learning_rate, label='lr')
+        ax2.set_ylabel("learning rate")
+        ax2.set_xlim(0, len(train_loss))  # 设置横坐标整数间隔
+        plt.legend(loc='best')
+
+        handles1, labels1 = ax1.get_legend_handles_labels()
+        handles2, labels2 = ax2.get_legend_handles_labels()
+        plt.legend(handles1 + handles2, labels1 + labels2, loc='upper right')
+
+        fig.subplots_adjust(right=0.8)  # 防止出现保存图片显示不全的情况
+        fig.savefig('./loss_and_lr.png')
+        print("successful save loss curve! ")
+    except Exception as e:
+        print(e)
+
+
+def plot_map(mAP):
+    try:
+        x = list(range(len(mAP)))
+        plt.plot(x, mAP, label='mAp')
+        plt.xlabel('epoch')
+        plt.ylabel('mAP')
+        plt.title('Eval mAP')
+        plt.xlim(0, len(mAP))
+        plt.legend(loc='best')
+        plt.savefig('./mAP.png')
+        print("successful save mAP curve!")
+    except Exception as e:
+        print(e)
diff --git a/pytorch_object_detection/ssd/src/ssd_model.py b/pytorch_object_detection/ssd/src/ssd_model.py
@@ -49,8 +49,8 @@ def __init__(self, backbone=None, num_classes=21):
             location_extractors.append(nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
             confidence_extractors.append(nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))
 
-        self.location_extractors = nn.ModuleList(location_extractors)
-        self.confidence_extractors = nn.ModuleList(confidence_extractors)
+        self.loc = nn.ModuleList(location_extractors)
+        self.conf = nn.ModuleList(confidence_extractors)
         self._init_weights()
 
         default_box = dboxes300_coco()
@@ -80,7 +80,7 @@ def _build_additional_features(self, input_size):
         self.additional_blocks = nn.ModuleList(additional_blocks)
 
     def _init_weights(self):
-        layers = [*self.additional_blocks, *self.location_extractors, *self.confidence_extractors]
+        layers = [*self.additional_blocks, *self.loc, *self.conf]
         for layer in layers:
             for param in layer.parameters():
                 if param.dim() > 1:
@@ -91,13 +91,11 @@ def bbox_view(self, features, loc_extractor, conf_extractor):
         locs = []
         confs = []
         for f, l, c in zip(features, loc_extractor, conf_extractor):
+            # [batch, n*4, feat_size, feat_size] -> [batch, 4, -1]
             locs.append(l(f).view(f.size(0), 4, -1))
             confs.append(c(f).view(f.size(0), self.num_classes, -1))
-            # locs.append(l(f).view(f.size(0), -1, 4))
-            # confs.append(c(f).view(f.size(0), -1, self.num_classes))
 
         locs, confs = torch.cat(locs, 2).contiguous(), torch.cat(confs, 2).contiguous()
-        # locs, confs = torch.cat(locs, 1).contiguous(), torch.cat(confs, 1).contiguous()
         return locs, confs
 
     def forward(self, image, targets):
@@ -110,16 +108,19 @@ def forward(self, image, targets):
             detection_features.append(x)
 
         # Feature Map 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4
-        locs, confs = self.bbox_view(detection_features, self.location_extractors, self.confidence_extractors)
+        locs, confs = self.bbox_view(detection_features, self.loc, self.conf)
 
         # For SSD 300, shall return nbatch x 8732 x {nlabels, nlocs} results
         # 38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732
 
         if self.training:
             # bboxes_out (Tensor 8732 x 4), labels_out (Tensor 8732)
             bboxes_out = targets['boxes']
-            bboxes_out = bboxes_out.transpose(1, 2)
+            bboxes_out = bboxes_out.transpose(1, 2).contiguous()
+            # print(bboxes_out.is_contiguous())
             labels_out = targets['labels']
+            # print(labels_out.is_contiguous())
+
             # ploc, plabel, gloc, glabel
             loss = self.compute_loss(locs, confs, bboxes_out, labels_out)
             return {"total_losses": loss}
@@ -142,12 +143,14 @@ def __init__(self, dboxes):
         self.scale_wh = 1.0 / dboxes.scale_wh
 
         self.location_loss = nn.SmoothL1Loss(reduction='none')
-        self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0), requires_grad=False)
-        # self.dboxes = nn.Parameter(dboxes(order="xywh").unsqueeze(dim=0), requires_grad=False)
+        # self.location_loss = nn.SmoothL1Loss(reduce=False)
+        self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0),
+                                   requires_grad=False)
 
         # Two factor are from following links
         # http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html
         self.confidence_loss = nn.CrossEntropyLoss(reduction='none')
+        # self.confidence_loss = nn.CrossEntropyLoss(reduce=False)
 
     def _location_vec(self, loc):
         """
@@ -158,10 +161,7 @@ def _location_vec(self, loc):
         """
         gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :]
         gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log()
-        # gxy = self.scale_xy * (loc[:, :, :2] - self.dboxes[:, :, :2]) / self.dboxes[:, :, 2:]
-        # gwh = self.scale_wh * (loc[:, :, 2:] / self.dboxes[:, :, 2:]).log()
         return torch.cat((gxy, gwh), dim=1).contiguous()
-        # return torch.cat((gxy, gwh), dim=2).contiguous()
 
     def forward(self, ploc, plabel, gloc, glabel):
         """