Skip to content

Commit

Permalink
Tracking gradients in different layers, and serializing some MNIST im…
Browse files Browse the repository at this point in the history
…ages to JSON
  • Loading branch information
mnielsen committed Aug 23, 2014
1 parent ffce979 commit 4e8c4d9
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 16 deletions.
1 change: 1 addition & 0 deletions fig/data_1000.json

Large diffs are not rendered by default.

120 changes: 105 additions & 15 deletions fig/generate_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,113 @@
"""

import numpy as np

#### Libraries
# Standard library
import json
import math
import random
import shutil
import sys
sys.path.append("../code/")
import mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

# My library
import mnist_loader
import network2
net = network2.Network([784, 30, 30, 10])
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data[:1000]]
def sum(a, b): return [x+y for (x, y) in zip(a, b)]
gradient = reduce(sum, nabla_b_results)
average_gradient = [(np.reshape(g, len(g))/1000).tolist() for g in gradient]
# Discard all but the first 6 terms in each layer, discard the output layer
abbreviated_gradient = [ag[:6] for ag in average_gradient[:-1]]

import json
f = open("initial_gradient.json", "w")
json.dump(abbreviated_gradient, f)
f.close()
# Third-party libraries
import matplotlib.pyplot as plt
import numpy as np

def main():
# Load the data
full_td, _, _ = mnist_loader.load_data_wrapper()
td = full_td[:1000] # Just use the first 1000 items of training data
epochs = 500 # Number of epochs to train for

print "\nTwo hidden layers:"
net = network2.Network([784, 30, 30, 10])
initial_norms(td, net)
abbreviated_gradient = [
ag[:6] for ag in get_average_gradient(net, td)[:-1]]
print "Saving the averaged gradient for the top six neurons in each "+\
"layer.\nWARNING: This will affect the look of the book, so be "+\
"sure to check the\nrelevant material (early chapter 5)."
f = open("initial_gradient.json", "w")
json.dump(abbreviated_gradient, f)
f.close()
shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
training(td, net, epochs, "norms_during_training_2_layers.json")
plot_training(
epochs, "norms_during_training_2_layers.json", 2)

print "\nThree hidden layers:"
net = network2.Network([784, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs, "norms_during_training_3_layers.json")
plot_training(
epochs, "norms_during_training_3_layers.json", 3)

print "\nFour hidden layers:"
net = network2.Network([784, 30, 30, 30, 30, 10])
initial_norms(td, net)
training(td, net, epochs,
"norms_during_training_4_layers.json")
plot_training(
epochs, "norms_during_training_4_layers.json", 4)

def initial_norms(training_data, net):
average_gradient = get_average_gradient(net, training_data)
norms = [list_norm(avg) for avg in average_gradient[:-1]]
print "Average gradient for the hidden layers: "+str(norms)

def training(training_data, net, epochs, filename):
norms = []
for j in range(epochs):
average_gradient = get_average_gradient(net, training_data)
norms.append([list_norm(avg) for avg in average_gradient[:-1]])
print "Epoch: %s" % j
net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
f = open(filename, "w")
json.dump(norms, f)
f.close()

def plot_training(epochs, filename, num_layers):
f = open(filename, "r")
norms = json.load(f)
f.close()
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
for j in range(num_layers):
ax.plot(np.arange(epochs),
[n[j] for n in norms],
color=colors[j],
label="Hidden layer %s" % (j+1,))
ax.set_xlim([0, epochs])
ax.grid(True)
ax.set_xlabel('Number of epochs of training')
ax.set_title('Speed of learning: %s hidden layers' % num_layers)
ax.set_yscale('log')
plt.legend(loc="upper right")
fig_filename = "training_speed_%s_layers.png" % num_layers
plt.savefig(fig_filename)
shutil.copy(fig_filename, "../../images/"+fig_filename)
plt.show()

def get_average_gradient(net, training_data):
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
gradient = list_sum(nabla_b_results)
return [(np.reshape(g, len(g))/len(training_data)).tolist()
for g in gradient]

def zip_sum(a, b):
return [x+y for (x, y) in zip(a, b)]

def list_sum(l):
return reduce(zip_sum, l)

def list_norm(l):
return math.sqrt(sum([x*x for x in l]))

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion fig/initial_gradient.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[[-0.00015957744575933252, -7.109660645293893e-06, 0.00029705824697869363, -0.005433034945183055, -0.000601732153598837, -0.00031497136113071197], [-0.004743788813673901, -0.003335113231382309, -0.006826947354624844, 0.001668145239275299, -0.013916515462361398, 0.002312540509777085], [0.05364605271597593, -0.0057698230441689275, -0.010571727068813678, 0.07860259192197483, 0.014443898612513025, -0.019157824473129328]]
[[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]]
1 change: 1 addition & 0 deletions fig/norms_during_training_2_layers.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions fig/norms_during_training_3_layers.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions fig/norms_during_training_4_layers.json

Large diffs are not rendered by default.

46 changes: 46 additions & 0 deletions fig/serialize_images_to_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
serialize_images_to_json
~~~~~~~~~~~~~~~~~~~~~~~~
Utility to serialize parts of the training and validation data to JSON,
for use with Javascript. """

#### Libraries
# Standard library
import json
import sys

# My library
sys.path.append('../code/')
import mnist_loader

# Third-party libraries
import numpy as np


# Number of training and validation data images to serialize
NTD = 1000
NVD = 100

training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

def make_data_integer(td):
# This will be slow, due to the loop. It'd be better if numpy did
# this directly. But numpy.rint followed by tolist() doesn't
# convert to a standard Python int.
return [int(x) for x in (td*256).reshape(784).tolist()]

data = {"training": [
{"x": [x[0] for x in training_data[j][0].tolist()],
"y": [y[0] for y in training_data[j][1].tolist()]}
for j in xrange(NTD)],
"validation": [
{"x": [x[0] for x in validation_data[j][0].tolist()],
"y": validation_data[j][1]}
for j in xrange(NVD)]}

f = open("data_1000.json", "w")
json.dump(data, f)
f.close()


Binary file added fig/test.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added fig/training_speed_2_layers.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added fig/training_speed_3_layers.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added fig/training_speed_4_layers.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 4e8c4d9

Please sign in to comment.