-
Notifications
You must be signed in to change notification settings - Fork 20
/
train_places_net.py
361 lines (325 loc) · 15.3 KB
/
train_places_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
#!/usr/bin/env python
from __future__ import division
import argparse
import numpy as np
import os
import tempfile
import time
parser = argparse.ArgumentParser(
description='Train and evaluate a net on the MIT mini-places dataset.')
parser.add_argument('--image_root', default='./images/',
help='Directory where images are stored')
parser.add_argument('--crop', type=int, default=96,
help=('The edge length of the random image crops'
'(defaults to 96 for 96x96 crops)'))
parser.add_argument('--disp', type=int, default=10,
help='Print loss/accuracy every --disp training iterations')
parser.add_argument('--snapshot_dir', default='./snapshot',
help='Path to directory where snapshots are saved')
parser.add_argument('--snapshot_prefix', default='place_net',
help='Snapshot filename prefix')
parser.add_argument('--iters', type=int, default=50*1000,
help='Total number of iterations to train the network')
parser.add_argument('--batch', type=int, default=256,
help='The batch size to use for training')
parser.add_argument('--iter_size', type=int, default=1,
help=('The number of iterations (batches) over which to average the '
'gradient computation. Effectively increases the batch size '
'(--batch) by this factor, but without increasing memory use '))
parser.add_argument('--lr', type=float, default=0.01,
help='The initial learning rate')
parser.add_argument('--gamma', type=float, default=0.1,
help='Factor by which to drop the learning rate')
parser.add_argument('--stepsize', type=int, default=10*1000,
help='Drop the learning rate every N iters -- this specifies N')
parser.add_argument('--momentum', type=float, default=0.9,
help='The momentum hyperparameter to use for momentum SGD')
parser.add_argument('--decay', type=float, default=5e-4,
help='The L2 weight decay coefficient')
parser.add_argument('--seed', type=int, default=1,
help='Seed for the random number generator')
parser.add_argument('--cudnn', action='store_true',
help='Use CuDNN at training time -- usually faster, but non-deterministic')
parser.add_argument('--gpu', type=int, default=0,
help='GPU ID to use for training and inference (-1 for CPU)')
args = parser.parse_args()
# disable most Caffe logging (unless env var $GLOG_minloglevel is already set)
key = 'GLOG_minloglevel'
if not os.environ.get(key, ''):
os.environ[key] = '3'
import caffe
from caffe.proto import caffe_pb2
from caffe import layers as L
from caffe import params as P
if args.gpu >= 0:
caffe.set_mode_gpu()
caffe.set_device(args.gpu)
else:
caffe.set_mode_cpu()
def to_tempfile(file_content):
"""Serialize a Python protobuf object str(proto), dump to a temporary file,
and return its filename."""
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(file_content)
return f.name
weight_param = dict(lr_mult=1, decay_mult=1)
bias_param = dict(lr_mult=2, decay_mult=0)
learned_param = [weight_param, bias_param]
frozen_param = [dict(lr_mult=0)] * 2
zero_filler = dict(type='constant', value=0)
msra_filler = dict(type='msra')
uniform_filler = dict(type='uniform', min=-0.1, max=0.1)
fc_filler = dict(type='gaussian', std=0.005)
# Original AlexNet used the following commented out Gaussian initialization;
# we'll use the "MSRA" one instead, which scales the Gaussian initialization
# of a convolutional filter based on its receptive field size.
# conv_filler = dict(type='gaussian', std=0.01)
conv_filler = dict(type='msra')
def conv_relu(bottom, ks, nout, stride=1, pad=0, group=1,
param=learned_param,
weight_filler=conv_filler, bias_filler=zero_filler,
train=False):
# set CAFFE engine to avoid CuDNN convolution -- non-deterministic results
engine = {}
if train and not args.cudnn:
engine.update(engine=P.Pooling.CAFFE)
conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
num_output=nout, pad=pad, group=group, param=param,
weight_filler=weight_filler, bias_filler=bias_filler,
**engine)
return conv, L.ReLU(conv, in_place=True)
def fc_relu(bottom, nout, param=learned_param,
weight_filler=fc_filler, bias_filler=zero_filler):
fc = L.InnerProduct(bottom, num_output=nout, param=param,
weight_filler=weight_filler, bias_filler=bias_filler)
return fc, L.ReLU(fc, in_place=True)
def max_pool(bottom, ks, stride=1, train=False):
# set CAFFE engine to avoid CuDNN pooling -- non-deterministic results
engine = {}
if train and not args.cudnn:
engine.update(engine=P.Pooling.CAFFE)
return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride,
**engine)
def minialexnet(data, labels=None, train=False, param=learned_param,
num_classes=100, with_labels=True):
"""
Returns a protobuf text file specifying a variant of AlexNet, following the
original specification (<caffe>/models/bvlc_alexnet/train_val.prototxt).
The changes with respect to the original AlexNet are:
- LRN (local response normalization) layers are not included
- The Fully Connected (FC) layers (fc6 and fc7) have smaller dimensions
due to the lower resolution of mini-places images (128x128) compared
with ImageNet images (usually resized to 256x256)
"""
n = caffe.NetSpec()
n.data = data
conv_kwargs = dict(param=param, train=train)
n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, **conv_kwargs)
n.pool1 = max_pool(n.relu1, 3, stride=2, train=train)
n.conv2, n.relu2 = conv_relu(n.pool1, 5, 256, pad=2, group=2, **conv_kwargs)
n.pool2 = max_pool(n.relu2, 3, stride=2, train=train)
n.conv3, n.relu3 = conv_relu(n.pool2, 3, 384, pad=1, **conv_kwargs)
n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2, **conv_kwargs)
n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2, **conv_kwargs)
n.pool5 = max_pool(n.relu5, 3, stride=2, train=train)
n.fc6, n.relu6 = fc_relu(n.pool5, 1024, param=param)
n.drop6 = L.Dropout(n.relu6, in_place=True)
n.fc7, n.relu7 = fc_relu(n.drop6, 1024, param=param)
n.drop7 = L.Dropout(n.relu7, in_place=True)
preds = n.fc8 = L.InnerProduct(n.drop7, num_output=num_classes, param=param)
if not train:
# Compute the per-label probabilities at test/inference time.
preds = n.probs = L.Softmax(n.fc8)
if with_labels:
n.label = labels
n.loss = L.SoftmaxWithLoss(n.fc8, n.label)
n.accuracy_at_1 = L.Accuracy(preds, n.label)
n.accuracy_at_5 = L.Accuracy(preds, n.label,
accuracy_param=dict(top_k=5))
else:
n.ignored_label = labels
n.silence_label = L.Silence(n.ignored_label, ntop=0)
return to_tempfile(str(n.to_proto()))
def get_split(split):
filename = './development_kit/data/%s.txt' % split
if not os.path.exists(filename):
raise IOError('Split data file not found: %s' % split)
return filename
def miniplaces_net(source, train=False, with_labels=True):
mean = [104, 117, 123] # per-channel mean of the BGR image pixels
transform_param = dict(mirror=train, crop_size=args.crop, mean_value=mean)
batch_size = args.batch if train else 100
places_data, places_labels = L.ImageData(transform_param=transform_param,
source=source, root_folder=args.image_root, shuffle=train,
batch_size=batch_size, ntop=2)
return minialexnet(data=places_data, labels=places_labels, train=train,
with_labels=with_labels)
def snapshot_prefix():
return os.path.join(args.snapshot_dir, args.snapshot_prefix)
def snapshot_at_iteration(iteration):
return '%s_iter_%d.caffemodel' % (snapshot_prefix(), iteration)
def miniplaces_solver(train_net_path, test_net_path=None):
s = caffe_pb2.SolverParameter()
# Specify locations of the train and (maybe) test networks.
s.train_net = train_net_path
if test_net_path is not None:
s.test_net.append(test_net_path)
# Test after every 1000 training iterations.
s.test_interval = 1000
# Set `test_iter` to test on 100 batches each time we test.
# With test batch size 100, this covers the entire validation set of
# 10K images (100 * 100 = 10K).
s.test_iter.append(100)
else:
s.test_interval = args.iters + 1 # don't test during training
# The number of batches over which to average the gradient.
# Effectively boosts the training batch size by the given factor, without
# affecting memory utilization.
s.iter_size = args.iter_size
# Solve using the stochastic gradient descent (SGD) algorithm.
# Other choices include 'Adam' and 'RMSProp'.
s.type = 'SGD'
# The following settings (base_lr, lr_policy, gamma, stepsize, and max_iter),
# define the following learning rate schedule:
# Iterations [ 0, 20K) -> learning rate 0.01 = base_lr
# Iterations [20K, 40K) -> learning rate 0.001 = base_lr * gamma
# Iterations [40K, 50K) -> learning rate 0.0001 = base_lr * gamma^2
# Set the initial learning rate for SGD.
s.base_lr = args.lr
# Set `lr_policy` to define how the learning rate changes during training.
# Here, we 'step' the learning rate by multiplying it by a factor `gamma`
# every `stepsize` iterations.
s.lr_policy = 'step'
s.gamma = args.gamma
s.stepsize = args.stepsize
# `max_iter` is the number of times to update the net (training iterations).
s.max_iter = args.iters
# Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
# weighted average of the current gradient and previous gradients to make
# learning more stable. L2 weight decay regularizes learning, to help
# prevent the model from overfitting.
s.momentum = args.momentum
s.weight_decay = args.decay
# Display the current training loss and accuracy every `display` iterations.
# This doesn't have an effect for Python training here as logging is
# disabled by this script (see the GLOG_minloglevel setting).
s.display = args.disp
# Number of training iterations over which to smooth the displayed loss.
# The summed loss value (Iteration N, loss = X) will be averaged,
# but individual loss values (Train net output #K: my_loss = X) won't be.
s.average_loss = 10
# Seed the RNG for deterministic results.
# (May not be so deterministic if using CuDNN.)
s.random_seed = args.seed
# Snapshots are files used to store networks we've trained. Here, we'll
# snapshot twice per learning rate step to the location specified by the
# --snapshot_dir and --snapshot_prefix args.
s.snapshot = args.stepsize // 2
s.snapshot_prefix = snapshot_prefix()
# Create snapshot dir if it doesn't already exist.
if not os.path.exists(args.snapshot_dir):
os.makedirs(args.snapshot_dir)
return to_tempfile(str(s))
def train_net(with_val_net=False):
train_net_file = miniplaces_net(get_split('train'), train=True)
# Set with_val_net=True to test during training.
# Environment variable GLOG_minloglevel should be set to 0 to display
# Caffe output in this case; otherwise, the test result will not be
# displayed.
if with_val_net:
val_net_file = miniplaces_net(get_split('val'), train=False)
else:
val_net_file = None
solver_file = miniplaces_solver(train_net_file, val_net_file)
solver = caffe.get_solver(solver_file)
outputs = sorted(solver.net.outputs)
def str_output(output):
value = solver.net.blobs[output].data
if output.startswith('accuracy'):
valstr = '%5.2f%%' % (100 * value, )
else:
valstr = '%6f' % value
return '%s = %s' % (output, valstr)
def disp_outputs(iteration, iter_pad_len=len(str(args.iters))):
metrics = '; '.join(str_output(o) for o in outputs)
return 'Iteration %*d: %s' % (iter_pad_len, iteration, metrics)
# We could just call `solver.solve()` rather than `step()`ing in a loop.
# (If we hadn't set GLOG_minloglevel = 3 at the top of this file, Caffe
# would display loss/accuracy information during training.)
previous_time = None
for iteration in xrange(args.iters):
solver.step(1)
if (args.disp > 0) and (iteration % args.disp == 0):
current_time = time.clock()
if previous_time is None:
benchmark = ''
else:
time_per_iter = (current_time - previous_time) / args.disp
benchmark = ' (%5f s/it)' % time_per_iter
previous_time = current_time
print disp_outputs(iteration), benchmark
# Print accuracy for last iteration.
solver.net.forward()
disp_outputs(args.iters)
solver.net.save(snapshot_at_iteration(args.iters))
def eval_net(split, K=5):
print 'Running evaluation for split:', split
filenames = []
labels = []
split_file = get_split(split)
with open(split_file, 'r') as f:
for line in f.readlines():
parts = line.split()
assert 1 <= len(parts) <= 2, 'malformed line'
filenames.append(parts[0])
if len(parts) > 1:
labels.append(int(parts[1]))
known_labels = (len(labels) > 0)
if known_labels:
assert len(labels) == len(filenames)
else:
# create file with 'dummy' labels (all 0s)
split_file = to_tempfile(''.join('%s 0\n' % name for name in filenames))
test_net_file = miniplaces_net(split_file, train=False, with_labels=False)
weights_file = snapshot_at_iteration(args.iters)
net = caffe.Net(test_net_file, weights_file, caffe.TEST)
top_k_predictions = np.zeros((len(filenames), K), dtype=np.int32)
if known_labels:
correct_label_probs = np.zeros(len(filenames))
offset = 0
while offset < len(filenames):
probs = net.forward()['probs']
for prob in probs:
top_k_predictions[offset] = (-prob).argsort()[:K]
if known_labels:
correct_label_probs[offset] = prob[labels[offset]]
offset += 1
if offset >= len(filenames):
break
if known_labels:
def accuracy_at_k(preds, labels, k):
assert len(preds) == len(labels)
num_correct = sum(l in p[:k] for p, l in zip(preds, labels))
return num_correct / len(preds)
for k in [1, K]:
accuracy = 100 * accuracy_at_k(top_k_predictions, labels, k)
print '\tAccuracy at %d = %4.2f%%' % (k, accuracy)
cross_ent_error = -np.log(correct_label_probs).mean()
print '\tSoftmax cross-entropy error = %.4f' % (cross_ent_error, )
else:
print 'Not computing accuracy; ground truth unknown for split:', split
filename = 'top_%d_predictions.%s.csv' % (K, split)
with open(filename, 'w') as f:
f.write(','.join(['image'] + ['label%d' % i for i in range(1, K+1)]))
f.write('\n')
f.write(''.join('%s,%s\n' % (image, ','.join(str(p) for p in preds))
for image, preds in zip(filenames, top_k_predictions)))
print 'Predictions for split %s dumped to: %s' % (split, filename)
if __name__ == '__main__':
print 'Training net...\n'
train_net()
print '\nTraining complete. Evaluating...\n'
for split in ('train', 'val', 'test'):
eval_net(split)
print
print 'Evaluation complete.'