forked from keras-team/keras
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmnist_transfer_cnn.py
127 lines (103 loc) · 3.84 KB
/
mnist_transfer_cnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
'''Transfer learning toy example:
1- Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
2- Freeze convolutional layers and fine-tune dense layers
for the classification of digits [5..9].
Run on GPU: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python mnist_transfer_cnn.py
Get to 99.8% test accuracy after 5 epochs
for the first five digits classifier
and 99.2% for the last five digits after transfer + fine-tuning.
'''
from __future__ import print_function
import numpy as np
import datetime
np.random.seed(1337) # for reproducibility
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
now = datetime.datetime.now
batch_size = 128
nb_classes = 5
nb_epoch = 5
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size
kernel_size = 3
if K.image_dim_ordering() == 'th':
input_shape = (1, img_rows, img_cols)
else:
input_shape = (img_rows, img_cols, 1)
def train_model(model, train, test, nb_classes):
X_train = train[0].reshape((train[0].shape[0],) + input_shape)
X_test = test[0].reshape((test[0].shape[0],) + input_shape)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(train[1], nb_classes)
Y_test = np_utils.to_categorical(test[1], nb_classes)
model.compile(loss='categorical_crossentropy',
optimizer='adadelta',
metrics=['accuracy'])
t = now()
model.fit(X_train, Y_train,
batch_size=batch_size, nb_epoch=nb_epoch,
verbose=1,
validation_data=(X_test, Y_test))
print('Training time: %s' % (now() - t))
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# create two datasets one with digits below 5 and one with 5 and above
X_train_lt5 = X_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
X_test_lt5 = X_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]
X_train_gte5 = X_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5 # make classes start at 0 for
X_test_gte5 = X_test[y_test >= 5] # np_utils.to_categorical
y_test_gte5 = y_test[y_test >= 5] - 5
# define two groups of layers: feature (convolutions) and classification (dense)
feature_layers = [
Convolution2D(nb_filters, kernel_size, kernel_size,
border_mode='valid',
input_shape=input_shape),
Activation('relu'),
Convolution2D(nb_filters, kernel_size, kernel_size),
Activation('relu'),
MaxPooling2D(pool_size=(pool_size, pool_size)),
Dropout(0.25),
Flatten(),
]
classification_layers = [
Dense(128),
Activation('relu'),
Dropout(0.5),
Dense(nb_classes),
Activation('softmax')
]
# create complete model
model = Sequential(feature_layers + classification_layers)
# train model for 5-digit classification [0..4]
train_model(model,
(X_train_lt5, y_train_lt5),
(X_test_lt5, y_test_lt5), nb_classes)
# freeze feature layers and rebuild model
for l in feature_layers:
l.trainable = False
# transfer: train dense layers for new classification task [5..9]
train_model(model,
(X_train_gte5, y_train_gte5),
(X_test_gte5, y_test_gte5), nb_classes)