-
Notifications
You must be signed in to change notification settings - Fork 294
/
Copy pathcdcgan-svhn.py
207 lines (175 loc) · 9.78 KB
/
cdcgan-svhn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# SVHN generator based on DCGAN and Conditional GAN with Tensorflow
# References:
# Radford, A., Metz, L., and Chintala, S. Unsupervised representation learning with deep convolutional generative adversarial networks. 2016.
# M. Mirza and S. Osindero. Conditional generative adversarial nets. CoRR, abs/1411.1784, 2014.
# S. Reed, Z. Akata, X. Yan, L. Logeswaran, B. Schiele, H. Lee. Generative Adversarial Text to Image Synthesis, ICML 2016
# This code is available under the MIT License.
# (c)2016 Nakatani Shuyo / Cybozu Labs Inc.
import argparse, configparser, re
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--config', help='config file', default="cdcgan-svhn.ini")
parser.add_argument('-s', '--section', help='section of config', default="DEFAULT")
parser.add_argument('--init', action="store_true", help='initialize parameters if model exists')
parser.add_argument('--cls', action="store_true", help='use matching-aware discriminator for label')
parser.add_argument('-t', type=int, help='generate test sample')
#parser.add_argument('-w', help='vectorize and pickle dump with word2vec')
args = parser.parse_args()
config = configparser.ConfigParser()
config.read(args.config)
param = config[args.section]
def ints(s):
return [int(x.group(0)) for x in re.finditer(r'\d+', s)]
import numpy, math, time, os
import scipy.io
import tensorflow as tf
import matplotlib.pyplot as plt
# model parameter
noise_dim = int(param["noise dim"]) # input noise size of Generator
Dhidden = ints(param["discriminator hidden units"]) # hidden units of Discriminator's network
Ghidden = ints(param["generator hidden units"]) # hidden units of Generator's network
mini_batch_size = int(param["mini batch size"])
alpha = float(param["alpha"])
samples=(8,10) # samples drawing size
nsamples = samples[0] * samples[1]
assert nsamples <= mini_batch_size
epoch = int(param["epoch"])
svhn = scipy.io.loadmat(param["SVHN path"])
num_labels = int(param["number of labels"])
train_data = svhn["X"]
train_labels = svhn["y"].flatten()
#train_data = train_data[:, :, :, :1024] # small dataset
#train_labels = train_labels[:1024]
train_labels[train_labels>=num_labels] = 0
fig_width, fig_height, n_channels, N = train_data.shape
train_data = train_data.reshape(fig_width * fig_height * n_channels, N)
train_data -= train_data.min(axis=0)
train_data = (numpy.array(train_data, dtype=numpy.float32) / train_data.max(axis=0)).T.reshape(N, fig_width, fig_height, n_channels)
period = N // mini_batch_size
X = tf.placeholder(tf.float32, shape=(None, fig_width, fig_height, n_channels))
Y = tf.placeholder(tf.float32, shape=(None, num_labels))
Ywrong = tf.placeholder(tf.float32, shape=(None, num_labels))
Z = tf.placeholder(tf.float32, shape=(None, noise_dim))
keep_prob = tf.placeholder(tf.float32)
Lrate = tf.placeholder(tf.float32)
with tf.variable_scope("G"):
GW0 = tf.Variable(tf.random_normal([noise_dim, Ghidden[0]*4*4], stddev=0.01))
GW0y = tf.Variable(tf.random_normal([num_labels, Ghidden[0]*4*4], stddev=0.01))
Gb0 = tf.Variable(tf.zeros(Ghidden[0]))
GW1 = tf.Variable(tf.random_normal([5, 5, Ghidden[1], Ghidden[0]], stddev=0.01))
Gb1 = tf.Variable(tf.zeros(Ghidden[1]))
GW2 = tf.Variable(tf.random_normal([5, 5, Ghidden[2], Ghidden[1]], stddev=0.01))
Gb2 = tf.Variable(tf.zeros(Ghidden[2]))
GW3 = tf.Variable(tf.random_normal([5, 5, n_channels, Ghidden[2]], stddev=0.01))
Gb3 = tf.Variable(tf.zeros(n_channels))
# batch normalization & relu
def bn(u):
mean, variance = tf.nn.moments(u, axes=[0, 1, 2])
return tf.nn.relu(tf.nn.batch_normalization(u, mean, variance, None, None, 1e-5))
Gh0 = bn(tf.nn.bias_add(tf.reshape(tf.matmul(Z, GW0)+tf.matmul(Y,GW0y), [-1, fig_width//8, fig_height//8, Ghidden[0]]), Gb0))
Gh1 = bn(tf.nn.bias_add(tf.nn.conv2d_transpose(Gh0, GW1, [mini_batch_size, fig_width//4, fig_height//4, Ghidden[1]], [1, 2, 2, 1]), Gb1))
Gh2 = bn(tf.nn.bias_add(tf.nn.conv2d_transpose(Gh1, GW2, [mini_batch_size, fig_width//2, fig_height//2, Ghidden[2]], [1, 2, 2, 1]), Gb2))
G = tf.nn.tanh(tf.nn.bias_add(tf.nn.conv2d_transpose(Gh2, GW3, [mini_batch_size, fig_width, fig_height, n_channels], [1, 2, 2, 1]), Gb3))
with tf.variable_scope("D"):
DW0 = tf.Variable(tf.random_normal([5, 5, n_channels, Dhidden[0]], stddev=0.01))
DW0y = tf.Variable(tf.random_normal([num_labels, (fig_width//2)*(fig_height//2)*Dhidden[0]], stddev=0.01))
Db0 = tf.Variable(tf.zeros(Dhidden[0]))
DW1 = tf.Variable(tf.random_normal([5, 5, Dhidden[0], Dhidden[1]], stddev=0.01))
Db1 = tf.Variable(tf.zeros(Dhidden[1]))
DW2 = tf.Variable(tf.random_normal([5, 5, Dhidden[1], Dhidden[2]], stddev=0.01))
Db2 = tf.Variable(tf.zeros(Dhidden[2]))
DW3 = tf.Variable(tf.random_normal([(fig_width//8)*(fig_height//8)*Dhidden[2], 1], stddev=0.01))
Db3 = tf.Variable(tf.zeros(1))
# batch normalization & leaky relu
def bnl(u, a=0.2):
mean, variance = tf.nn.moments(u, axes=[0, 1, 2])
b = tf.nn.batch_normalization(u, mean, variance, None, None, 1e-5)
return tf.maximum(a * b, b)
def discriminator(xx, yy):
Dh0 = bnl(tf.nn.bias_add(tf.nn.conv2d(xx, DW0, [1, 2, 2, 1], padding='SAME')+tf.reshape(tf.matmul(yy, DW0y),[-1,(fig_width//2),(fig_height//2),Dhidden[0]]), Db0))
Dh1 = bnl(tf.nn.bias_add(tf.nn.conv2d(Dh0, DW1, [1, 2, 2, 1], padding='SAME'), Db1))
Dh2 = bnl(tf.nn.bias_add(tf.nn.conv2d(Dh1, DW2, [1, 2, 2, 1], padding='SAME'), Db2))
return tf.nn.sigmoid(tf.matmul(tf.reshape(Dh2, [-1, (fig_width//8)*(fig_height//8)*Dhidden[2]]), DW3) + Db3)
DG = discriminator(G, Y)
if args.cls:
Dloss = -tf.reduce_mean(tf.log(discriminator(X, Y) + 1e-99) + (tf.log(1 + 1e-99 - discriminator(X, Ywrong)) + tf.log(1 + 1e-99 - DG))/2)
else:
Dloss = -tf.reduce_mean(tf.log(discriminator(X, Y) + 1e-99) + tf.log(1 + 1e-99 - DG))
#Gloss = tf.reduce_mean(tf.log(1 - DG))
Gloss = tf.reduce_mean(tf.log(1 + 1e-99 - DG) - tf.log(DG + 1e-99)) # the second term for stable learning
vars = tf.trainable_variables()
Dvars = [v for v in vars if v.name.startswith("D")]
Gvars = [v for v in vars if v.name.startswith("G")]
Doptimizer = tf.train.AdamOptimizer(learning_rate=Lrate).minimize(Dloss, var_list=Dvars)
Goptimizer = tf.train.AdamOptimizer(learning_rate=Lrate).minimize(Gloss, var_list=Gvars)
work_dir = param["working directory"]
if not os.path.exists(work_dir): os.makedirs(work_dir)
model_path = os.path.join(work_dir, param["model filename"])
saver = tf.train.Saver()
sess = tf.Session()
if args.init or not os.path.exists(model_path):
sess.run(tf.initialize_all_variables())
else:
saver.restore(sess, model_path)
def save_figure(path, z, y):
Gz = sess.run(G, feed_dict={Z: z, Y: y})
fig = plt.gcf()
fig.subplots_adjust(left=0,bottom=0,right=1,top=1)
for i in range(nsamples):
ax = fig.add_subplot(samples[0], samples[1], i + 1)
ax.axis("off")
ax.imshow(Gz[i,:,:,:])
plt.savefig(path)
return plt, Gz
if args.t:
D = discriminator(X, Y)
ub = mini_batch_size // num_labels + 1
y = numpy.tile(numpy.eye(num_labels),(ub,1))[:mini_batch_size]
for i in range(args.t):
z = numpy.random.uniform(-1, 1, size=(ub, noise_dim)).repeat(num_labels, axis=0)[:mini_batch_size]
_, g = save_figure(os.path.join(work_dir, "cdcgan-svhn-test-%03d.png" % i), z, y)
d = sess.run(D, feed_dict={X: g, Y: y})
for j in range(samples[0]):
print(i, j, d[samples[1]*j:(samples[1]+1)*j].mean())
else:
t0 = time.time()
drawz = numpy.random.uniform(-1, 1, size=(mini_batch_size//num_labels+1, noise_dim)).repeat(num_labels, axis=0)[:mini_batch_size]
drawy = numpy.tile(numpy.eye(num_labels),(mini_batch_size//num_labels+1,1))[:mini_batch_size]
for e in range(epoch):
index = numpy.random.permutation(N)
dloss = gloss = 0.0
for i in range(period):
idx = index[i*mini_batch_size:(i+1)*mini_batch_size]
x = train_data[idx, :]
y = numpy.zeros((mini_batch_size, num_labels), dtype=numpy.float32)
y[numpy.arange(mini_batch_size), train_labels[idx]] = 1
z = numpy.random.uniform(-1, 1, size=(mini_batch_size, noise_dim))
if args.cls:
ywrong = numpy.zeros((mini_batch_size, num_labels), dtype=numpy.float32)
wrong_labels = (train_labels[idx] + numpy.random.randint(0, num_labels-1, mini_batch_size)) % num_labels
ywrong[numpy.arange(mini_batch_size), wrong_labels] = 1
else:
ywrong = numpy.zeros((0, num_labels), dtype=numpy.float32)
loss, _ = sess.run([Dloss, Doptimizer], feed_dict={X:x, Y:y, Ywrong:ywrong, Z:z, keep_prob:0.5, Lrate:1e-3})
if math.isnan(loss):
sess.run(tf.initialize_variables(Dvars)) # initialize & retry if NaN
print("...initialize discriminator's parameters for nan...")
else:
dloss += loss
for ii in range(10):
y = numpy.zeros((mini_batch_size, num_labels), dtype=numpy.float32)
y[numpy.arange(mini_batch_size), numpy.random.randint(0,10,mini_batch_size)] = 1
z = numpy.random.uniform(-1, 1, size=(mini_batch_size, noise_dim))
for jj in range(10):
loss, _ = sess.run([Gloss, Goptimizer], feed_dict={Y:y, Z:z, keep_prob:1.0, Lrate:1e-5})
if math.isnan(loss):
sess.run(tf.initialize_variables(Gvars)) # initialize & retry if NaN
print("...initialize generator's parameters for nan...")
else:
gloss += loss
print("%d: dloss=%.5f, gloss=%.5f, time=%.1f" % (e+1, dloss / period, gloss / period, time.time()-t0))
p, _ = save_figure(os.path.join(work_dir, "cdcgan-svhn-%03d.png" % (e+1)), drawz, drawy)
p.draw()
p.pause(0.01)
saver.save(sess, model_path)