Add small changes

vkanade · Jun 2, 2016 · 2ef1f82 · 2ef1f82
1 parent 2b8cab8
commit 2ef1f82
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 8 deletions.
diff --git a/src/mnist_loader.py b/src/mnist_loader.py
@@ -8,14 +8,16 @@
 function usually called by our neural network code.
 """
 
-#### Libraries
+
+# Libraries
 # Standard library
 import cPickle
 import gzip
 
 # Third-party libraries
 import numpy as np
 
+
 def load_data():
     """Return the MNIST data as a tuple containing the training data,
     the validation data, and the test data.
@@ -44,6 +46,7 @@ def load_data():
     f.close()
     return (training_data, validation_data, test_data)
 
+
 def load_data_wrapper():
     """Return a tuple containing ``(training_data, validation_data,
     test_data)``. Based on ``load_data``, but the format is more
@@ -75,6 +78,40 @@ def load_data_wrapper():
     test_data = zip(test_inputs, te_d[1])
     return (training_data, validation_data, test_data)
 
+
+def load_data_wrapper2():
+    """
+    Return a tuple containing ``(training_data, validation_data,
+    test_data)``. Based on ``load_data``, but the format is more
+    convenient for use in our implementation of neural networks.
+
+    In particular, ``training_data`` is a list containing 50,000
+    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
+    containing the input image.  ``y`` is a 4-dimensional
+    numpy.ndarray representing the binary encoding corresponding to the
+    correct digit for ``x``.
+
+    ``validation_data`` and ``test_data`` are lists containing 10,000
+    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
+    numpy.ndarry containing the input image, and ``y`` is the
+    corresponding classification, i.e., the digit values (integers)
+    corresponding to ``x``.
+
+    Obviously, this means we're using slightly different formats for
+    the training data and the validation / test data.  These formats
+    turn out to be the most convenient for use in our neural network
+    code."""
+    tr_d, va_d, te_d = load_data()
+    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
+    training_results = [vectorized_result2(y) for y in tr_d[1]]
+    training_data = zip(training_inputs, training_results)
+    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
+    validation_data = zip(validation_inputs, va_d[1])
+    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
+    test_data = zip(test_inputs, te_d[1])
+    return (training_data, validation_data, test_data)
+
+
 def vectorized_result(j):
     """Return a 10-dimensional unit vector with a 1.0 in the jth
     position and zeroes elsewhere.  This is used to convert a digit
@@ -83,3 +120,19 @@ def vectorized_result(j):
     e = np.zeros((10, 1))
     e[j] = 1.0
     return e
+
+
+def vectorized_result2(j):
+    """Return a 4-dimensional vector with binary coding of j.  This is used to
+    convert a digit (0...9) into a corresponding desired output from the neural
+    network."""
+    e = np.zeros((4, 1))
+    if j % 2 >= 1:
+        e[0] = 1.0
+    if j % 4 >= 2:
+        e[1] = 1.0
+    if j % 8 >= 4:
+        e[2] = 1.0
+    if j % 16 >= 8:
+        e[3] = 1.0
+    return e
diff --git a/src/network.py b/src/network.py
@@ -9,13 +9,14 @@
 and omits many desirable features.
 """
 
-#### Libraries
+# Libraries
 # Standard library
 import random
 
 # Third-party libraries
 import numpy as np
 
+
 class Network(object):
 
     def __init__(self, sizes):
@@ -51,7 +52,8 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
         network will be evaluated against the test data after each
         epoch, and partial progress printed out.  This is useful for
         tracking progress, but slows things down substantially."""
-        if test_data: n_test = len(test_data)
+        if test_data:
+            n_test = len(test_data)
         n = len(training_data)
         for j in xrange(epochs):
             random.shuffle(training_data)
@@ -91,8 +93,8 @@ def backprop(self, x, y):
         nabla_w = [np.zeros(w.shape) for w in self.weights]
         # feedforward
         activation = x
-        activations = [x] # list to store all the activations, layer by layer
-        zs = [] # list to store all the z vectors, layer by layer
+        activations = [x]  # list to store all the activations, layer by layer
+        zs = []  # list to store all the z vectors, layer by layer
         for b, w in zip(self.biases, self.weights):
             z = np.dot(w, activation)+b
             zs.append(z)
@@ -122,20 +124,32 @@ def evaluate(self, test_data):
         network outputs the correct result. Note that the neural
         network's output is assumed to be the index of whichever
         neuron in the final layer has the highest activation."""
-        test_results = [(np.argmax(self.feedforward(x)), y)
-                        for (x, y) in test_data]
+        # Hacky
+        def toNumber(array):
+            return int(round(array[0])) + int(round(array[1]))*2 + \
+                   int(round(array[3]))*4 + int(round(array[3]))*8
+
+        if self.sizes[-1] == 10:
+            test_results = [(np.argmax(self.feedforward(x)), y)
+                            for (x, y) in test_data]
+        else:
+            test_results = [(toNumber(self.feedforward(x)), y)
+                            for (x, y) in test_data]
+
         return sum(int(x == y) for (x, y) in test_results)
 
     def cost_derivative(self, output_activations, y):
         """Return the vector of partial derivatives \partial C_x /
         \partial a for the output activations."""
         return (output_activations-y)
 
-#### Miscellaneous functions
+
+# Miscellaneous functions
 def sigmoid(z):
     """The sigmoid function."""
     return 1.0/(1.0+np.exp(-z))
 
+
 def sigmoid_prime(z):
     """Derivative of the sigmoid function."""
     return sigmoid(z)*(1-sigmoid(z))