forked from fyu/dilation
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
change network generation; adding testing code
- Loading branch information
Showing
5 changed files
with
465 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import print_function, division | ||
from caffe import layers as L | ||
from caffe import params as P | ||
|
||
|
||
__author__ = 'Fisher Yu' | ||
__copyright__ = 'Copyright (c) 2016, Fisher Yu' | ||
__email__ = '[email protected]' | ||
__license__ = 'MIT' | ||
|
||
|
||
def make_image_label_data(image_list_path, label_list_path, batch_size, | ||
mirror, crop_size, mean_pixel, | ||
label_stride=8, margin=186): | ||
label_dim = (crop_size - margin * 2) // 8 | ||
data, label = L.ImageLabelData( | ||
transform_param=dict(mirror=mirror, mean_value=mean_pixel, | ||
crop_size=crop_size), | ||
image_label_data_param=dict( | ||
image_list_path=image_list_path, label_list_path=label_list_path, | ||
shuffle=True, batch_size=batch_size, | ||
padding=P.ImageLabelData.REFLECT, | ||
label_slice=dict(dim=[label_dim, label_dim], | ||
stride=[label_stride, label_stride], | ||
offset=[margin, margin])), | ||
ntop=2) | ||
return data, label | ||
|
||
|
||
def make_input_data(dim): | ||
return L.Input(input_param=dict(shape=dict(dim=[1, 3, dim, dim]))) | ||
|
||
|
||
def make_softmax_loss(bottom, label): | ||
return L.SoftmaxWithLoss(bottom, label, | ||
loss_param=dict(ignore_label=255, | ||
normalization=P.Loss.VALID)) | ||
|
||
|
||
def make_accuracy(bottom, label): | ||
return L.Accuracy(bottom, label, accuracy_param=dict(ignore_label=255)) | ||
|
||
|
||
def make_prob(bottom): | ||
return L.Softmax(bottom) | ||
|
||
|
||
def make_upsample(bottom, num_classes): | ||
return L.Deconvolution( | ||
bottom, | ||
param=[dict(lr_mult=0, decay_mult=0)], | ||
convolution_param=dict( | ||
bias_term=False, num_output=num_classes, kernel_size=16, stride=8, | ||
group=num_classes, pad=4, weight_filler=dict(type="bilinear"))) | ||
|
||
|
||
def build_frontend_vgg(net, bottom, num_classes): | ||
prev_layer = bottom | ||
num_convolutions = [2, 2, 3, 3, 3] | ||
dilations = [0, 0, 0, 0, 2, 4] | ||
for l in range(5): | ||
num_outputs = min(64 * 2 ** l, 512) | ||
for i in range(0, num_convolutions[l]): | ||
conv_name = 'conv{0}_{1}'.format(l+1, i+1) | ||
relu_name = 'relu{0}_{1}'.format(l+1, i+1) | ||
if dilations[l] == 0: | ||
setattr(net, conv_name, | ||
L.Convolution( | ||
prev_layer, | ||
param=[dict(lr_mult=1, decay_mult=1), | ||
dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict(num_output=num_outputs, | ||
kernel_size=3))) | ||
else: | ||
setattr(net, conv_name, | ||
L.Convolution( | ||
prev_layer, | ||
param=[dict(lr_mult=1, decay_mult=1), | ||
dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict(num_output=num_outputs, | ||
kernel_size=3, | ||
dilation=dilations[l]))) | ||
setattr(net, relu_name, | ||
L.ReLU(getattr(net, conv_name), in_place=True)) | ||
prev_layer = getattr(net, relu_name) | ||
if dilations[l+1] == 0: | ||
pool_name = 'pool{0}'.format(l+1) | ||
setattr(net, pool_name, L.Pooling( | ||
prev_layer, pool=P.Pooling.MAX, kernel_size=2, stride=2)) | ||
prev_layer = getattr(net, pool_name) | ||
|
||
net.fc6 = L.Convolution( | ||
prev_layer, | ||
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict(num_output=4096, kernel_size=7, | ||
dilation=dilations[5])) | ||
net.relu6 = L.ReLU(net.fc6, in_place=True) | ||
net.drop6 = L.Dropout(net.relu6, in_place=True, dropout_ratio=0.5) | ||
net.fc7 = L.Convolution( | ||
net.drop6, | ||
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict(num_output=4096, kernel_size=1)) | ||
net.relu7 = L.ReLU(net.fc7, in_place=True) | ||
net.drop7 = L.Dropout(net.relu7, in_place=True, dropout_ratio=0.5) | ||
net.final = L.Convolution( | ||
net.drop7, | ||
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict( | ||
num_output=num_classes, kernel_size=1, | ||
weight_filler=dict(type='gaussian', std=0.001), | ||
bias_filler=dict(type='constant', value=0))) | ||
return net.final, 'final' | ||
|
||
|
||
def build_context(net, bottom, num_classes, levels=8): | ||
prev_layer = bottom | ||
multiplier = 1 | ||
for i in range(1, 3): | ||
conv_name = 'ctx_conv1_{}'.format(i) | ||
relu_name = 'ctx_relu1_{}'.format(i) | ||
setattr(net, conv_name, | ||
L.Convolution( | ||
*([] if prev_layer is None else [prev_layer]), | ||
param=[dict(lr_mult=1, decay_mult=1), | ||
dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict( | ||
num_output=num_classes * multiplier, kernel_size=3, | ||
pad=1))) | ||
setattr(net, relu_name, | ||
L.ReLU(getattr(net, conv_name), in_place=True)) | ||
prev_layer = getattr(net, relu_name) | ||
|
||
for i in range(2, levels - 2): | ||
dilation = 2 ** (i - 1) | ||
multiplier = 1 | ||
conv_name = 'ctx_conv{}_1'.format(i) | ||
relu_name = 'ctx_relu{}_1'.format(i) | ||
setattr(net, conv_name, | ||
L.Convolution( | ||
prev_layer, | ||
param=[dict(lr_mult=1, decay_mult=1), | ||
dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict( | ||
num_output=num_classes * multiplier, kernel_size=3, | ||
dilation=dilation, pad=dilation) | ||
)) | ||
setattr(net, relu_name, | ||
L.ReLU(getattr(net, conv_name), in_place=True)) | ||
prev_layer = getattr(net, relu_name) | ||
|
||
net.ctx_fc1 = L.Convolution( | ||
prev_layer, | ||
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict( | ||
num_output=num_classes * multiplier, kernel_size=3, | ||
pad=1)) | ||
net.ctx_fc1_relu = L.ReLU(net.ctx_fc1, in_place=True) | ||
net.ctx_final = L.Convolution( | ||
net.ctx_fc1_relu, | ||
param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], | ||
convolution_param=dict( | ||
num_output=num_classes, kernel_size=1)) | ||
return net.ctx_final, 'ctx_final' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,30 +11,14 @@ | |
from os.path import dirname, exists, join, splitext | ||
import sys | ||
|
||
import util | ||
|
||
__author__ = 'Fisher Yu' | ||
__copyright__ = 'Copyright (c) 2016, Fisher Yu' | ||
__email__ = '[email protected]' | ||
__license__ = 'MIT' | ||
|
||
|
||
@numba.jit(nopython=False) | ||
def interp_map(prob, zoom, width, height): | ||
zoom_prob = np.zeros((prob.shape[0], height, width), dtype=np.float32) | ||
for c in range(prob.shape[0]): | ||
for h in range(height): | ||
for w in range(width): | ||
r0 = h // zoom | ||
r1 = r0 + 1 | ||
c0 = w // zoom | ||
c1 = c0 + 1 | ||
rt = float(h) / zoom - r0 | ||
ct = float(w) / zoom - c0 | ||
v0 = rt * prob[c, r1, c0] + (1 - rt) * prob[c, r0, c0] | ||
v1 = rt * prob[c, r1, c1] + (1 - rt) * prob[c, r0, c1] | ||
zoom_prob[c, h, w] = (1 - ct) * v0 + ct * v1 | ||
return zoom_prob | ||
|
||
|
||
class Dataset(object): | ||
def __init__(self, dataset_name): | ||
self.work_dir = dirname(__file__) | ||
|
@@ -109,7 +93,7 @@ def predict(dataset_name, input_path, output_path): | |
prediction.append(col_prediction) | ||
prob = np.concatenate(prediction, axis=1) | ||
if dataset.zoom > 1: | ||
prob = interp_map(prob, dataset.zoom, image_size[1], image_size[0]) | ||
prob = util.interp_map(prob, dataset.zoom, image_size[1], image_size[0]) | ||
prediction = np.argmax(prob.transpose([1, 2, 0]), axis=2) | ||
color_image = dataset.palette[prediction.ravel()].reshape(image_size) | ||
color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR) | ||
|
Oops, something went wrong.