change network generation; adding testing code

ice-pice · Jul 6, 2016 · 7153d0b · 7153d0b
1 parent ab8da31
commit 7153d0b
Show file tree

Hide file tree

Showing 5 changed files with 465 additions and 43 deletions.
diff --git a/network.py b/network.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, division
+from caffe import layers as L
+from caffe import params as P
+
+
+__author__ = 'Fisher Yu'
+__copyright__ = 'Copyright (c) 2016, Fisher Yu'
+__email__ = '[email protected]'
+__license__ = 'MIT'
+
+
+def make_image_label_data(image_list_path, label_list_path, batch_size,
+                          mirror, crop_size, mean_pixel,
+                          label_stride=8, margin=186):
+    label_dim = (crop_size - margin * 2) // 8
+    data, label = L.ImageLabelData(
+        transform_param=dict(mirror=mirror, mean_value=mean_pixel,
+                             crop_size=crop_size),
+        image_label_data_param=dict(
+            image_list_path=image_list_path, label_list_path=label_list_path,
+            shuffle=True, batch_size=batch_size,
+            padding=P.ImageLabelData.REFLECT,
+            label_slice=dict(dim=[label_dim, label_dim],
+                             stride=[label_stride, label_stride],
+                             offset=[margin, margin])),
+        ntop=2)
+    return data, label
+
+
+def make_input_data(dim):
+    return L.Input(input_param=dict(shape=dict(dim=[1, 3, dim, dim])))
+
+
+def make_softmax_loss(bottom, label):
+    return L.SoftmaxWithLoss(bottom, label,
+                             loss_param=dict(ignore_label=255,
+                                             normalization=P.Loss.VALID))
+
+
+def make_accuracy(bottom, label):
+    return L.Accuracy(bottom, label, accuracy_param=dict(ignore_label=255))
+
+
+def make_prob(bottom):
+    return L.Softmax(bottom)
+
+
+def make_upsample(bottom, num_classes):
+    return L.Deconvolution(
+        bottom,
+        param=[dict(lr_mult=0, decay_mult=0)],
+        convolution_param=dict(
+            bias_term=False, num_output=num_classes, kernel_size=16, stride=8,
+            group=num_classes, pad=4, weight_filler=dict(type="bilinear")))
+
+
+def build_frontend_vgg(net, bottom, num_classes):
+    prev_layer = bottom
+    num_convolutions = [2, 2, 3, 3, 3]
+    dilations = [0, 0, 0, 0, 2, 4]
+    for l in range(5):
+        num_outputs = min(64 * 2 ** l, 512)
+        for i in range(0, num_convolutions[l]):
+            conv_name = 'conv{0}_{1}'.format(l+1, i+1)
+            relu_name = 'relu{0}_{1}'.format(l+1, i+1)
+            if dilations[l] == 0:
+                setattr(net, conv_name,
+                        L.Convolution(
+                            prev_layer,
+                            param=[dict(lr_mult=1, decay_mult=1),
+                                   dict(lr_mult=2, decay_mult=0)],
+                            convolution_param=dict(num_output=num_outputs,
+                                                   kernel_size=3)))
+            else:
+                setattr(net, conv_name,
+                        L.Convolution(
+                            prev_layer,
+                            param=[dict(lr_mult=1, decay_mult=1),
+                                   dict(lr_mult=2, decay_mult=0)],
+                            convolution_param=dict(num_output=num_outputs,
+                                                   kernel_size=3,
+                                                   dilation=dilations[l])))
+            setattr(net, relu_name,
+                    L.ReLU(getattr(net, conv_name), in_place=True))
+            prev_layer = getattr(net, relu_name)
+        if dilations[l+1] == 0:
+            pool_name = 'pool{0}'.format(l+1)
+            setattr(net, pool_name, L.Pooling(
+                prev_layer, pool=P.Pooling.MAX, kernel_size=2, stride=2))
+            prev_layer = getattr(net, pool_name)
+
+    net.fc6 = L.Convolution(
+        prev_layer,
+        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
+        convolution_param=dict(num_output=4096, kernel_size=7,
+                               dilation=dilations[5]))
+    net.relu6 = L.ReLU(net.fc6, in_place=True)
+    net.drop6 = L.Dropout(net.relu6, in_place=True, dropout_ratio=0.5)
+    net.fc7 = L.Convolution(
+        net.drop6,
+        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
+        convolution_param=dict(num_output=4096, kernel_size=1))
+    net.relu7 = L.ReLU(net.fc7, in_place=True)
+    net.drop7 = L.Dropout(net.relu7, in_place=True, dropout_ratio=0.5)
+    net.final = L.Convolution(
+        net.drop7,
+        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
+        convolution_param=dict(
+            num_output=num_classes, kernel_size=1,
+            weight_filler=dict(type='gaussian', std=0.001),
+            bias_filler=dict(type='constant', value=0)))
+    return net.final, 'final'
+
+
+def build_context(net, bottom, num_classes, levels=8):
+    prev_layer = bottom
+    multiplier = 1
+    for i in range(1, 3):
+        conv_name = 'ctx_conv1_{}'.format(i)
+        relu_name = 'ctx_relu1_{}'.format(i)
+        setattr(net, conv_name,
+                L.Convolution(
+                    *([] if prev_layer is None else [prev_layer]),
+                    param=[dict(lr_mult=1, decay_mult=1),
+                           dict(lr_mult=2, decay_mult=0)],
+                    convolution_param=dict(
+                        num_output=num_classes * multiplier, kernel_size=3,
+                        pad=1)))
+        setattr(net, relu_name,
+                L.ReLU(getattr(net, conv_name), in_place=True))
+        prev_layer = getattr(net, relu_name)
+
+    for i in range(2, levels - 2):
+        dilation = 2 ** (i - 1)
+        multiplier = 1
+        conv_name = 'ctx_conv{}_1'.format(i)
+        relu_name = 'ctx_relu{}_1'.format(i)
+        setattr(net, conv_name,
+                L.Convolution(
+                    prev_layer,
+                    param=[dict(lr_mult=1, decay_mult=1),
+                           dict(lr_mult=2, decay_mult=0)],
+                    convolution_param=dict(
+                        num_output=num_classes * multiplier, kernel_size=3,
+                        dilation=dilation, pad=dilation)
+                ))
+        setattr(net, relu_name,
+                L.ReLU(getattr(net, conv_name), in_place=True))
+        prev_layer = getattr(net, relu_name)
+
+    net.ctx_fc1 = L.Convolution(
+        prev_layer,
+        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
+        convolution_param=dict(
+                        num_output=num_classes * multiplier, kernel_size=3,
+                        pad=1))
+    net.ctx_fc1_relu = L.ReLU(net.ctx_fc1, in_place=True)
+    net.ctx_final = L.Convolution(
+        net.ctx_fc1_relu,
+        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
+        convolution_param=dict(
+                        num_output=num_classes, kernel_size=1))
+    return net.ctx_final, 'ctx_final'
diff --git a/predict.py b/predict.py
@@ -11,30 +11,14 @@
 from os.path import dirname, exists, join, splitext
 import sys
 
+import util
+
 __author__ = 'Fisher Yu'
 __copyright__ = 'Copyright (c) 2016, Fisher Yu'
 __email__ = '[email protected]'
 __license__ = 'MIT'
 
 
-@numba.jit(nopython=False)
-def interp_map(prob, zoom, width, height):
-    zoom_prob = np.zeros((prob.shape[0], height, width), dtype=np.float32)
-    for c in range(prob.shape[0]):
-        for h in range(height):
-            for w in range(width):
-                r0 = h // zoom
-                r1 = r0 + 1
-                c0 = w // zoom
-                c1 = c0 + 1
-                rt = float(h) / zoom - r0
-                ct = float(w) / zoom - c0
-                v0 = rt * prob[c, r1, c0] + (1 - rt) * prob[c, r0, c0]
-                v1 = rt * prob[c, r1, c1] + (1 - rt) * prob[c, r0, c1]
-                zoom_prob[c, h, w] = (1 - ct) * v0 + ct * v1
-    return zoom_prob
-
-
 class Dataset(object):
     def __init__(self, dataset_name):
         self.work_dir = dirname(__file__)
@@ -109,7 +93,7 @@ def predict(dataset_name, input_path, output_path):
         prediction.append(col_prediction)
     prob = np.concatenate(prediction, axis=1)
     if dataset.zoom > 1:
-        prob = interp_map(prob, dataset.zoom, image_size[1], image_size[0])
+        prob = util.interp_map(prob, dataset.zoom, image_size[1], image_size[0])
     prediction = np.argmax(prob.transpose([1, 2, 0]), axis=2)
     color_image = dataset.palette[prediction.ravel()].reshape(image_size)
     color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)