Skip to content

Commit

Permalink
Merge pull request WZMIAOMIAO#39 from WZMIAOMIAO/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
WZMIAOMIAO authored Jul 24, 2020
2 parents 1e7e3aa + 606a8d2 commit 0636048
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 38 deletions.
2 changes: 1 addition & 1 deletion pytorch_object_detection/ssd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
* 若要使用多GPU训练,使用 "python -m torch.distributed.launch --nproc_per_node=8 --use_env train_multi_GPU.py" 指令,nproc_per_node参数为使用GPU数量

## 如果对SSD算法原理不是很理解可参考我的bilibili
* https://b23.tv/GJnkOD
* https://www.bilibili.com/video/BV1fT4y1L7Gi

## 进一步了解该项目,以及对SSD算法代码的分析可参考我的bilibili
* https://www.bilibili.com/video/BV1vK411H771/
Expand Down
22 changes: 11 additions & 11 deletions pytorch_object_detection/ssd/src/ssd_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,29 +146,28 @@ class Loss(nn.Module):
"""
def __init__(self, dboxes):
super(Loss, self).__init__()
self.scale_xy = 1.0 / dboxes.scale_xy
self.scale_wh = 1.0 / dboxes.scale_wh
# Two factor are from following links
# http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html
self.scale_xy = 1.0 / dboxes.scale_xy # 10
self.scale_wh = 1.0 / dboxes.scale_wh # 5

self.location_loss = nn.SmoothL1Loss(reduction='none')
# self.location_loss = nn.SmoothL1Loss(reduce=False)
# [num_anchors, 4] -> [4, num_anchors] -> [1, 4, num_anchors]
self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0),
requires_grad=False)

# Two factor are from following links
# http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html
self.confidence_loss = nn.CrossEntropyLoss(reduction='none')
# self.confidence_loss = nn.CrossEntropyLoss(reduce=False)

def _location_vec(self, loc):
# type: (Tensor)
"""
Generate Location Vectors
计算ground truth相对anchors的回归参数
:param loc:
:param loc: anchor匹配到的对应GTBOX Nx4x8732
:return:
"""
gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :]
gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log()
gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :] # Nx2x8732
gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log() # Nx2x8732
return torch.cat((gxy, gwh), dim=1).contiguous()

def forward(self, ploc, plabel, gloc, glabel):
Expand Down Expand Up @@ -217,8 +216,9 @@ def forward(self, ploc, plabel, gloc, glabel):
# avoid no object detected
# 避免出现图像中没有GTBOX的情况
total_loss = loc_loss + con_loss
num_mask = (pos_num > 0).float() # 统计一个batch中的每张图像中是否存在GTBOX
# eg. [15, 3, 5, 0] -> [1.0, 1.0, 1.0, 0.0]
num_mask = (pos_num > 0).float() # 统计一个batch中的每张图像中是否存在正样本
pos_num = pos_num.float().clamp(min=1e-6) # 防止出现分母为零的情况
ret = (total_loss * num_mask / pos_num).mean(dim=0) # 只计算存在GTBOX的图像损失
ret = (total_loss * num_mask / pos_num).mean(dim=0) # 只计算存在正样本的图像损失
return ret

67 changes: 41 additions & 26 deletions pytorch_object_detection/ssd/src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,18 +339,23 @@ def decode_single(self, bboxes_in, scores_in, criteria, max_output, max_num=200)

class DefaultBoxes(object):
def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, scale_xy=0.1, scale_wh=0.2):
self.fig_size = fig_size # 输入网络的图像大小
self.fig_size = fig_size # 输入网络的图像大小 300
# [38, 19, 10, 5, 3, 1]
self.feat_size = feat_size # 每个预测层的feature map尺寸

self.scale_xy_ = scale_xy
self.scale_wh_ = scale_wh

# According to https://github.com/weiliu89/caffe
# Calculation method slightly different from paper
# [8, 16, 32, 64, 100, 300]
self.steps = steps # 每个特征层上的一个cell在原图上的跨度

# [21, 45, 99, 153, 207, 261, 315]
self.scales = scales # 每个特征层上预测的default box的scale

fk = fig_size / np.array(steps) # 计算每层特征层的fk
# [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
self.aspect_ratios = aspect_ratios # 每个预测特征层上预测的default box的ratios

self.default_boxes = []
Expand All @@ -376,17 +381,18 @@ def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, scale_xy=0
cx, cy = (j + 0.5) / fk[idx], (i + 0.5) / fk[idx]
self.default_boxes.append((cx, cy, w, h))

self.dboxes = torch.tensor(self.default_boxes, dtype=torch.float32) # 这里不转类型会报错
self.dboxes.clamp_(min=0, max=1) # 将坐标(x, y, w, h)都限制在0-1之间
# 将default_boxes转为tensor格式
self.dboxes = torch.tensor(self.default_boxes, dtype=torch.float32) # 这里不转类型会报错
self.dboxes.clamp_(min=0, max=1) # 将坐标(x, y, w, h)都限制在0-1之间

# For IoU calculation
# ltrb is left top coordinate and right bottom coordinate
# 将(x, y, w, h)转换成(xmin, ymin, xmax, ymax),方便后续计算IoU(匹配正负样本时)
self.dboxes_ltrb = self.dboxes.clone()
self.dboxes_ltrb[:, 0] = self.dboxes[:, 0] - 0.5 * self.dboxes[:, 2]
self.dboxes_ltrb[:, 1] = self.dboxes[:, 1] - 0.5 * self.dboxes[:, 3]
self.dboxes_ltrb[:, 2] = self.dboxes[:, 0] + 0.5 * self.dboxes[:, 2]
self.dboxes_ltrb[:, 3] = self.dboxes[:, 1] + 0.5 * self.dboxes[:, 3]
# For IoU calculation
# ltrb is left top coordinate and right bottom coordinate
# 将(x, y, w, h)转换成(xmin, ymin, xmax, ymax),方便后续计算IoU(匹配正负样本时)
self.dboxes_ltrb = self.dboxes.clone()
self.dboxes_ltrb[:, 0] = self.dboxes[:, 0] - 0.5 * self.dboxes[:, 2] # xmin
self.dboxes_ltrb[:, 1] = self.dboxes[:, 1] - 0.5 * self.dboxes[:, 3] # ymin
self.dboxes_ltrb[:, 2] = self.dboxes[:, 0] + 0.5 * self.dboxes[:, 2] # xmax
self.dboxes_ltrb[:, 3] = self.dboxes[:, 1] + 0.5 * self.dboxes[:, 3] # ymax

@property
def scale_xy(self):
Expand Down Expand Up @@ -498,27 +504,32 @@ def batched_nms(boxes, scores, idxs, iou_threshold):
class PostProcess(nn.Module):
def __init__(self, dboxes):
super(PostProcess, self).__init__()
# [num_anchors, 4] -> [1, num_anchors, 4]
self.dboxes_xywh = nn.Parameter(dboxes(order='xywh').unsqueeze(dim=0),
requires_grad=False)
self.scale_xy = dboxes.scale_xy
self.scale_wh = dboxes.scale_wh
self.scale_xy = dboxes.scale_xy # 0.1
self.scale_wh = dboxes.scale_wh # 0.2

self.criteria = 0.5
self.max_output = 100

def scale_back_batch(self, bboxes_in, scores_in):
# type: (Tensor, Tensor)
"""
将box格式从xywh转换回ltrb, 将预测目标score通过softmax处理
1)通过预测的boxes回归参数得到最终预测坐标
2)将box格式从xywh转换回ltrb
3)将预测目标score通过softmax处理
Do scale and transform from xywh to ltrb
suppose input N x 4 x num_bbox | N x label_num x num_bbox
bboxes_in: 是网络预测的xywh回归参数
scores_in: 是预测的每个default box的各目标概率
bboxes_in: [N, 4, 8732]是网络预测的xywh回归参数
scores_in: [N, label_num, 8732]是预测的每个default box的各目标概率
"""

# Returns a view of the original tensor with its dimensions permuted.
# [batch, 4, 8732] -> [batch, 8732, 4]
bboxes_in = bboxes_in.permute(0, 2, 1)
# [batch, label_num, 8732] -> [batch, 8732, label_num]
scores_in = scores_in.permute(0, 2, 1)
# print(bboxes_in.is_contiguous())

Expand All @@ -540,6 +551,7 @@ def scale_back_batch(self, bboxes_in, scores_in):
bboxes_in[:, :, 2] = r # xmax
bboxes_in[:, :, 3] = b # ymax

# scores_in: [batch, 8732, label_num]
return bboxes_in, F.softmax(scores_in, dim=-1)

def decode_single_new(self, bboxes_in, scores_in, criteria, num_output):
Expand All @@ -562,27 +574,28 @@ def decode_single_new(self, bboxes_in, scores_in, criteria, num_output):

# create labels for each prediction
labels = torch.arange(num_classes, device=device)
# [num_classes] -> [8732, num_classes]
labels = labels.view(1, -1).expand_as(scores_in)

# remove prediction with the background label
# 移除归为背景类别的概率信息
bboxes_in = bboxes_in[:, 1:, :]
scores_in = scores_in[:, 1:]
labels = labels[:, 1:]
bboxes_in = bboxes_in[:, 1:, :] # [8732, 21, 4] -> [8732, 20, 4]
scores_in = scores_in[:, 1:] # [8732, 21] -> [8732, 20]
labels = labels[:, 1:] # [8732, 21] -> [8732, 20]

# batch everything, by making every class prediction be a separate instance
bboxes_in = bboxes_in.reshape(-1, 4)
scores_in = scores_in.reshape(-1)
labels = labels.reshape(-1)
bboxes_in = bboxes_in.reshape(-1, 4) # [8732, 20, 4] -> [8732x20, 4]
scores_in = scores_in.reshape(-1) # [8732, 20] -> [8732x20]
labels = labels.reshape(-1) # [8732, 20] -> [8732x20]

# remove low scoring boxes
# 移除低概率目标,self.scores_thresh=0.05
inds = torch.nonzero(scores_in > 0.05).squeeze(1)
bboxes_in, scores_in, labels = bboxes_in[inds], scores_in[inds], labels[inds]
bboxes_in, scores_in, labels = bboxes_in[inds, :], scores_in[inds], labels[inds]

# remove empty boxes
ws, hs = bboxes_in[:, 2] - bboxes_in[:, 0], bboxes_in[:, 3] - bboxes_in[:, 1]
keep = (ws >= 0.1 / 300) & (hs >= 0.1 / 300)
keep = (ws >= 1 / 300) & (hs >= 1 / 300)
keep = keep.nonzero().squeeze(1)
bboxes_in, scores_in, labels = bboxes_in[keep], scores_in[keep], labels[keep]

Expand All @@ -598,12 +611,14 @@ def decode_single_new(self, bboxes_in, scores_in, criteria, num_output):
return bboxes_out, labels_out, scores_out

def forward(self, bboxes_in, scores_in):
# 将box格式从xywh转换回ltrb(方便后面非极大值抑制时求iou), 将预测目标score通过softmax处理
# 通过预测的boxes回归参数得到最终预测坐标, 将预测目标score通过softmax处理
bboxes, probs = self.scale_back_batch(bboxes_in, scores_in)

outputs = torch.jit.annotate(List[Tuple[Tensor, Tensor, Tensor]], [])
# 遍历一个batch中的每张image数据
for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)):
# bboxes: [batch, 8732, 4]
for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)): # split_size, split_dim
# bbox: [1, 8732, 4]
bbox = bbox.squeeze(0)
prob = prob.squeeze(0)
outputs.append(self.decode_single_new(bbox, prob, self.criteria, self.max_output))
Expand Down

0 comments on commit 0636048

Please sign in to comment.