Skip to content

Commit

Permalink
add train on mult-GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
wz authored and wz committed Jun 27, 2020
1 parent 83ffe1e commit ee00ae5
Show file tree
Hide file tree
Showing 13 changed files with 572 additions and 55 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ flower_data
checkpoint
data
VOCdevkit
ssd_resnet50_v1_fpn_shared_box_predictor
106 changes: 96 additions & 10 deletions pytorch_object_detection/RetinaNet/src/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Loss(nn.Module):
Implements the loss as the sum of the followings:
1. Confidence Loss: All labels, with hard negative mining
2. Localization Loss: Only on positive labels
Suppose input dboxes has the shape 8732x4
Suppose input dboxes has the shape 76725x4
"""
def __init__(self, dboxes):
super(Loss, self).__init__()
Expand Down Expand Up @@ -39,42 +39,128 @@ def _location_vec(self, loc):
def forward(self, ploc, plabel, gloc, glabel):
# type: (Tensor, Tensor, Tensor, Tensor)
"""
ploc, plabel: Nx4x8732, Nxlabel_numx8732
ploc, plabel: Nx4x76725, Nxlabel_numx76725
predicted location and labels
gloc, glabel: Nx4x8732, Nx8732
gloc, glabel: Nx4x76725, Nx76725
ground truth location and labels
"""
# 获取正样本的mask Tensor: [N, 8732]
# 获取正样本的mask Tensor: [N, 76725]
mask = glabel > 0
# mask1 = torch.nonzero(glabel)
# 计算一个batch中的每张图片的正样本个数 Tensor: [N]
pos_num = mask.sum(dim=1)

# 计算gt的location回归参数 Tensor: [N, 4, 8732]
# 计算gt的location回归参数 Tensor: [N, 4, 76725]
vec_gd = self._location_vec(gloc)

# sum on four coordinates, and mask
# 计算定位损失(只有正样本)
loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1) # Tensor: [N, 8732]
loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1) # Tensor: [N, 76725]
loc_loss = (mask.float() * loc_loss).sum(dim=1) # Tenosr: [N]

# hard negative mining Tenosr: [N, 8732]
# hard negative mining Tenosr: [N, 76725]
con = self.confidence_loss(plabel, glabel)

# positive mask will never selected
# 获取负样本
con_neg = con.clone()
con_neg[mask] = torch.tensor(0.0)
# 按照confidence_loss降序排列 con_idx(Tensor: [N, 8732])
# 按照confidence_loss降序排列 con_idx(Tensor: [N, 76725])
_, con_idx = con_neg.sort(dim=1, descending=True)
_, con_rank = con_idx.sort(dim=1) # 这个步骤比较巧妙

# number of negative three times positive
# 用于损失计算的负样本数是正样本的3倍(在原论文Hard negative mining部分),
# 但不能超过总样本数8732
# 但不能超过总样本数
neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1)
neg_mask = con_rank < neg_num # Tensor [N, 8732]
neg_mask = con_rank < neg_num # Tensor [N, 76725]

# confidence最终loss使用选取的正样本loss+选取的负样本loss
con_loss = (con * (mask.float() + neg_mask.float())).sum(dim=1) # Tensor [N]

# avoid no object detected
# 避免出现图像中没有GTBOX的情况
total_loss = loc_loss + con_loss
num_mask = (pos_num > 0).float() # 统计一个batch中的每张图像中是否存在GTBOX
pos_num = pos_num.float().clamp(min=1e-6) # 防止出现分母为零的情况
ret = (total_loss * num_mask / pos_num).mean(dim=0) # 只计算存在GTBOX的图像损失
return ret


class FocalLoss(nn.Module):
"""
Implements the loss as the sum of the followings:
1. Confidence Loss: All labels, with hard negative mining
2. Localization Loss: Only on positive labels
Suppose input dboxes has the shape 76725x4
"""
def __init__(self, dboxes):
super(FocalLoss, self).__init__()
self.scale_xy = 1.0 / dboxes.scale_xy
self.scale_wh = 1.0 / dboxes.scale_wh

self.location_loss = nn.SmoothL1Loss(reduction='none')
# self.location_loss = nn.SmoothL1Loss(reduce=False)
self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim=0),
requires_grad=False)

# Two factor are from following links
# http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html
self.confidence_loss = nn.CrossEntropyLoss(reduction='none')
# self.confidence_loss = nn.CrossEntropyLoss(reduce=False)

def _location_vec(self, loc):
# type: (Tensor)
"""
Generate Location Vectors
计算ground truth相对anchors的回归参数
:param loc:
:return:
"""
gxy = self.scale_xy * (loc[:, :2, :] - self.dboxes[:, :2, :]) / self.dboxes[:, 2:, :]
gwh = self.scale_wh * (loc[:, 2:, :] / self.dboxes[:, 2:, :]).log()
return torch.cat((gxy, gwh), dim=1).contiguous()

def forward(self, ploc, plabel, gloc, glabel):
# type: (Tensor, Tensor, Tensor, Tensor)
"""
ploc, plabel: Nx4x76725, Nxlabel_numx76725
predicted location and labels
gloc, glabel: Nx4x76725, Nx76725
ground truth location and labels
"""
# 获取正样本的mask Tensor: [N, 76725]
mask = glabel > 0
# mask1 = torch.nonzero(glabel)
# 计算一个batch中的每张图片的正样本个数 Tensor: [N]
pos_num = mask.sum(dim=1)

# 计算gt的location回归参数 Tensor: [N, 4, 76725]
vec_gd = self._location_vec(gloc)

# sum on four coordinates, and mask
# 计算定位损失(只有正样本)
loc_loss = self.location_loss(ploc, vec_gd).sum(dim=1) # Tensor: [N, 76725]
loc_loss = (mask.float() * loc_loss).sum(dim=1) # Tenosr: [N]

# hard negative mining Tenosr: [N, 76725]
con = self.confidence_loss(plabel, glabel)

# positive mask will never selected
# 获取负样本
con_neg = con.clone()
con_neg[mask] = torch.tensor(0.0)
# 按照confidence_loss降序排列 con_idx(Tensor: [N, 76725])
_, con_idx = con_neg.sort(dim=1, descending=True)
_, con_rank = con_idx.sort(dim=1) # 这个步骤比较巧妙

# number of negative three times positive
# 用于损失计算的负样本数是正样本的3倍(在原论文Hard negative mining部分),
# 但不能超过总样本数
neg_num = torch.clamp(3 * pos_num, max=mask.size(1)).unsqueeze(-1)
neg_mask = con_rank < neg_num # Tensor [N, 76725]

# confidence最终loss使用选取的正样本loss+选取的负样本loss
con_loss = (con * (mask.float() + neg_mask.float())).sum(dim=1) # Tensor [N]
Expand Down
8 changes: 4 additions & 4 deletions pytorch_object_detection/RetinaNet/src/res50_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def __init__(self, in_channels_list, out_channels=256, extra_blocks=None):
self.projection_blocks = nn.ModuleList()
# 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵
self.smoothing_blocks = nn.ModuleList()
for in_channels in in_channels_list:
for in_channels in in_channels_list: # [512, 1024, 2048]
projection_block = nn.Conv2d(in_channels, out_channels, 1)
self.projection_blocks.append(projection_block)

Expand Down Expand Up @@ -271,8 +271,8 @@ def forward(self, x):
# 将上一层的特征矩阵上采样到当前层大小
inner_top_down = F.interpolate(last_projection, size=feat_shape, mode="nearest")
# add
last_projection = projection_lateral + inner_top_down
last_projection = self.get_result_from_smoothing_blocks(last_projection, idx)
last_projection_t = projection_lateral + inner_top_down
last_projection = self.get_result_from_smoothing_blocks(last_projection_t, idx)
results.insert(0, last_projection)

# 在layer4对应的预测特征层基础上生成预测特征矩阵P6和P7
Expand Down Expand Up @@ -311,7 +311,7 @@ def __init__(self, backbone, return_layers, in_channels_list, out_channels):
super(BackboneWithFPN, self).__init__()
self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)
self.fpn = FeaturePyramidNetwork(
in_channels_list=in_channels_list,
in_channels_list=in_channels_list, # [512, 1024, 2048]
out_channels=out_channels,
extra_blocks=LastLevelMaxPool(in_channels=out_channels),
)
Expand Down
14 changes: 9 additions & 5 deletions pytorch_object_detection/RetinaNet/src/retina_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(self, num_features=5, in_channels=256,
self.shared_box_tower_conv = nn.ModuleList()
self.shared_class_tower_conv = nn.ModuleList()
# 构建共享的predictor tower权重
for i in range(num_layers_before_predictor):
for i in range(num_layers_before_predictor): # [0, 1, 2, 3]
self.shared_box_tower_conv.append(nn.Conv2d(in_channels, in_channels, 3, 1, 1, bias=False))
self.shared_class_tower_conv.append(nn.Conv2d(in_channels, in_channels, 3, 1, 1, bias=False))

Expand All @@ -71,13 +71,13 @@ def __init__(self, num_features=5, in_channels=256,
self.unshared_box_tower_relu6 = nn.ModuleList()
self.unshared_class_tower_relu6 = nn.ModuleList()
# 每个预测特征层的bn和activation都不共享
for i in range(num_features):
for i in range(num_features): # [0, 1, 2, 3, 4]
box_bn_every_layer = nn.ModuleList()
box_relu6_every_layer = nn.ModuleList()
class_bn_every_layer = nn.ModuleList()
class_relu6_every_layer = nn.ModuleList()

for j in range(num_layers_before_predictor):
for j in range(num_layers_before_predictor): # [0, 1, 2, 3]
box_bn_every_layer.append(nn.BatchNorm2d(in_channels))
box_relu6_every_layer.append(nn.ReLU6(inplace=True))

Expand All @@ -96,17 +96,21 @@ def __init__(self, num_features=5, in_channels=256,

def _init_weights(self):
layers = [*self.shared_box_tower_conv, *self.shared_class_tower_conv,
self.box_predictor, self.class_predictor]
self.box_predictor]
for layer in layers:
if isinstance(layer, nn.Conv2d):
nn.init.xavier_uniform_(layer.weight)
if layer.bias is not None:
nn.init.constant_(layer.bias, 0)

# 参考tf初始化方法
nn.init.normal_(self.class_predictor.weight, mean=0.0, std=0.01)
nn.init.constant_(self.class_predictor.bias, -4.6)

def forward(self, features):
class_outputs = torch.jit.annotate(List[Tensor], [])
box_outputs = torch.jit.annotate(List[Tensor], [])
for i in range(self.num_features):
for i in range(self.num_features): # [0, 1, 2, 3, 4]
feature = features[i]
box_output = feature
class_output = feature
Expand Down
Loading

0 comments on commit ee00ae5

Please sign in to comment.