diff --git a/README.md b/README.md
index 307ac416f..88e47e944 100644
--- a/README.md
+++ b/README.md
@@ -3,11 +3,9 @@
 This repository contains code samples for my book on ["Neural Networks
 and Deep Learning"](http://neuralnetworksanddeeplearning.com).
 
-The code is written for Python 2.6 or 2.7. Michal Daniel Dobrzanski
-has a repository for Python 3
-[here](https://github.com/MichalDanielDobrzanski/DeepLearningPython35). I
-will not be updating the current repository for Python 3
-compatibility.
+The code is written for Python 2.6 or 2.7. There is a version for 
+Python 3.8-3.10 [here](https://github.com/unexploredtest/neural-networks-and-deep-learning). 
+I will not be updating the current repository for Python 3 compatibility.
 
 The program `src/network3.py` uses version 0.6 or 0.7 of the Theano
 library.  It needs modification for compatibility with later versions
@@ -21,7 +19,7 @@ free to fork and modify the code.
 
 MIT License
 
-Copyright (c) 2012-2015 Michael Nielsen
+Copyright (c) 2012-2022 Michael Nielsen
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/fig/overfitting.py b/fig/overfitting.py
index 71688fde8..a37e474c0 100644
--- a/fig/overfitting.py
+++ b/fig/overfitting.py
@@ -35,9 +35,9 @@ def main(filename, num_epochs,
     """
     run_network(filename, num_epochs, training_set_size, lmbda)
     make_plots(filename, num_epochs, 
-               test_accuracy_xmin,
                training_cost_xmin,
-               test_accuracy_xmin, 
+               test_accuracy_xmin,
+               test_cost_xmin, 
                training_accuracy_xmin,
                training_set_size)
                        
diff --git a/requirements.txt b/requirements.txt
index 54aef7e95..88effd999 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy
-scikit-learn
-scipy
-Theano
+numpy==1.13.3
+scikit-learn==0.19.0
+scipy==0.19.1
+Theano==0.7.0
diff --git a/src/old/blog/__init__.py b/src/old/blog/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/src/old/blog/common_knowledge.py b/src/old/blog/common_knowledge.py
deleted file mode 100644
index 788935d39..000000000
--- a/src/old/blog/common_knowledge.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
-common_knowledge
-~~~~~~~~~~~~~~~~
-
-Try to determine whether or not it's possible to relate the
-descriptions given by two different autoencoders.
-
-"""
-
-#### Libraries
-# My libraries
-from backprop2 import Network, sigmoid_vec
-import mnist_loader
-
-# Third-party libraries
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-
-
-#### Parameters
-# Size of the training sets.  May range from 1000 to 12,500.  Lower
-# will be faster, higher will give more accuracy.
-SIZE = 5000 
-# Number of hidden units in the autoencoder
-HIDDEN = 30
-
-print "\nGenerating training data"
-training_data, _, _ = mnist_loader.load_data_nn()
-td_1 = [(x, x) for x, _ in training_data[0:SIZE]]
-td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]]
-td_3 = [x for x, _ in training_data[25000:25000+SIZE]]
-test = [x for x, _ in training_data[37500:37500+SIZE]]
-
-print "\nFinding first autoencoder"
-ae_1 = Network([784, HIDDEN, 784])
-ae_1.SGD(td_1, 4, 10, 0.01, 0.05)
-
-print "\nFinding second autoencoder"
-ae_2 = Network([784, HIDDEN, 784])
-ae_2.SGD(td_1, 4, 10, 0.01, 0.05)
-
-print "\nGenerating encoded training data"
-encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
-                for x in td_3]
-encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
-                for x in td_3]
-encoded_training_data = zip(encoded_td_1, encoded_td_2)
-
-print "\nFinding mapping between theories"
-net = Network([HIDDEN, HIDDEN])
-net.SGD(encoded_training_data, 6, 10, 0.01, 0.05)
-
-print """\nBaseline for comparison: decompress with the first autoencoder"""
-print """and compress with the second autoencoder"""
-encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0])
-                  for x in test]
-encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0])
-                  for x in test]
-test_data = zip(encoded_test_1, encoded_test_2)
-net_baseline = Network([HIDDEN, 784, HIDDEN])
-net_baseline.biases[0] = ae_1.biases[1]
-net_baseline.weights[0] = ae_1.weights[1]
-net_baseline.biases[1] = ae_2.biases[0]
-net_baseline.weights[1] = ae_2.weights[0]
-error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1) 
-                     for (x, y) in test_data)
-print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,)
-
-print "\nComparing theories with a simple interconversion"
-print "Mean desired output activation: %s" % (
-    sum(y.mean() for _, y in test_data) / SIZE,)
-error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data)
-print "Average l1 error per training image: %s" % (error / SIZE,)
-
-print "\nComputing fiducial image inputs"
-fiducial_images_1 = [
-    ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
-    for j in range(HIDDEN)]
-fiducial_images_2 = [
-    ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:])
-    for j in range(HIDDEN)]
-image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), 
-                        np.concatenate(fiducial_images_2, axis=1)])
-fig = plt.figure()
-ax = fig.add_subplot(111)
-ax.matshow(image, cmap = matplotlib.cm.binary)
-plt.xticks(np.array([]))
-plt.yticks(np.array([]))
-plt.show()
diff --git a/src/old/cost_vs_iterations.png b/src/old/cost_vs_iterations.png
deleted file mode 100644
index 74a21934c..000000000
Binary files a/src/old/cost_vs_iterations.png and /dev/null differ
diff --git a/src/old/cost_vs_iterations_trapped.png b/src/old/cost_vs_iterations_trapped.png
deleted file mode 100644
index 4b24b4820..000000000
Binary files a/src/old/cost_vs_iterations_trapped.png and /dev/null differ
diff --git a/src/old/deep_autoencoder.py b/src/old/deep_autoencoder.py
deleted file mode 100644
index afafce524..000000000
--- a/src/old/deep_autoencoder.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""
-deep_autoencoder
-~~~~~~~~~~~~~~~~
-
-A module which implements deep autoencoders.  
-"""
-
-#### Libraries
-# Standard library
-import random
-
-# My libraries
-from backprop2 import Network, sigmoid_vec
-
-# Third-party libraries
-import numpy as np
-
-
-def plot_helper(x):
-    import matplotlib
-    import matplotlib.pyplot as plt
-    x = np.reshape(x, (-1, 28))
-    fig = plt.figure()
-    ax = fig.add_subplot(1, 1, 1)
-    ax.matshow(x, cmap = matplotlib.cm.binary)
-    plt.xticks(np.array([]))
-    plt.yticks(np.array([]))
-    plt.show()
-
-
-class DeepAutoencoder(Network):
-
-    def __init__(self, layers):
-        """
-        The list ``layers`` specifies the sizes of the nested
-        autoencoders.  For example, if ``layers`` is [50, 20, 10] then
-        the deep autoencoder will be a neural network with layers of
-        size [50, 20, 10, 20, 50]."""
-        self.layers = layers
-        Network.__init__(self, layers+layers[-2::-1])
-
-    def train(self, training_data, epochs, mini_batch_size, eta,
-              lmbda):
-        """
-        Train the DeepAutoencoder.  The ``training_data`` is a list of
-        training inputs, ``x``, ``mini_batch_size`` is a single
-        positive integer, and ``epochs``, ``eta``, ``lmbda`` are lists
-        of parameters, with the different list members corresponding
-        to the different stages of training.  For example, ``eta[0]``
-        is the learning rate used for the first nested autoencoder,
-        ``eta[1]`` is the learning rate for the second nested
-        autoencoder, and so on.  ``eta[-1]`` is the learning rate used
-        for the final stage of fine-tuning.
-        """
-        print "\nTraining a %s deep autoencoder" % (
-            "-".join([str(j) for j in self.sizes]),)
-        training_data = double(training_data)
-        cur_training_data = training_data[::]
-        for j in range(len(self.layers)-1):
-            print "\nTraining the %s-%s-%s nested autoencoder" % (
-                self.layers[j], self.layers[j+1], self.layers[j])
-            print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
-                epochs[j], mini_batch_size, eta[j], lmbda[j])
-            self.train_nested_autoencoder(
-                j, cur_training_data, epochs[j], mini_batch_size, eta[j],
-                lmbda[j])
-            cur_training_data = [
-                (sigmoid_vec(np.dot(net.weights[0], x)+net.biases[0]),)*2
-                for (x, _) in cur_training_data]
-        print "\nFine-tuning network weights with backpropagation"
-        print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % (
-                epochs[-1], mini_batch_size, eta[-1], lmbda[-1])
-        self.SGD(training_data, epochs[-1], mini_batch_size, eta[-1],
-                 lmbda[-1])
-
-    def train_nested_autoencoder(
-        self, j, encoded_training_data, epochs, mini_batch_size, eta, lmbda):
-        """
-        Train the nested autoencoder that starts at layer ``j`` in the
-        deep autoencoder.  Note that ``encoded_training_data`` is a
-        list with entries of the form ``(x, x)``, where the ``x`` are
-        encoded training inputs for layer ``j``."""
-        net = Network([self.layers[j], self.layers[j+1], self.layers[j]])
-        net.biases[0] = self.biases[j]
-        net.biases[1] = self.biases[-j-1]
-        net.weights[0] = self.weights[j]
-        net.weights[1] = self.weights[-j-1]
-        net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
-        self.biases[j] = net.biases[0]
-        self.biases[-j-1] = net.biases[1]
-        self.weights[j] = net.weights[0]
-        self.weights[-j-1] = net.weights[1]
-
-    def train_nested_autoencoder_repl(
-        self, j, training_data, epochs, mini_batch_size, eta, lmbda):
-        """
-        This is a convenience method that can be used from the REPL to
-        train the nested autoencoder that starts at level ``j`` in the
-        deep autoencoder.  Note that ``training_data`` is the input
-        data for the first layer of the network, and is a list of
-        entries ``x``."""
-        self.train_nested_autoencoder(
-            j, 
-            double(
-                [self.feedforward(x, start=0, end=j) for x in training_data]),
-            epochs, mini_batch_size, eta, lmbda)
-
-    def feature(self, j, k):
-        """
-        Return the output if neuron number ``k`` in layer ``j`` is
-        activated, and all others are not active.  """
-        a = np.zeros((self.sizes[j], 1))
-        a[k] = 1.0
-        return self.feedforward(a, start=j, end=self.num_layers)
-
-def double(l):
-    return [(x, x) for x in l]
-
diff --git a/src/old/deep_learning.py b/src/old/deep_learning.py
deleted file mode 100644
index 9e6517a07..000000000
--- a/src/old/deep_learning.py
+++ /dev/null
@@ -1,51 +0,0 @@
-"""
-deep_learning
-~~~~~~~~~~~~~
-
-Module to do deep learning.  Most of the functionality needed is
-already in the ``backprop2`` and ``deep_autoencoder`` modules, but
-this adds convenience functions to help in doing things like unrolling
-deep autoencoders, and adding and training a classifier layer."""
-
-# My Libraries
-from backprop2 import Network
-from deep_autoencoder import DeepAutoencoder
-
-def unroll(deep_autoencoder):
-    """
-    Return a Network that contains the compression stage of the
-    ``deep_autoencoder``."""
-    net = Network(deep_autoencoder.layers)
-    net.weights = deep_autoencoder.weights[:len(deep_autoencoder.layers)-1]
-    net.biases = deep_autoencoder.biases[:len(deep_autoencoder.layers)-1]
-    return net
-
-def add_classifier_layer(net, num_outputs):
-    """
-    Return the Network ``net``, but with an extra layer containing
-    ``num_outputs`` neurons appended."""
-    net_classifier = Network(net.sizes+[num_outputs])
-    net_classifier.weights[:-1] = net.weights
-    net_classifier.biases[:-1] = net.biases
-    return net_classifier
-
-def SGD_final_layer(
-    self, training_data, epochs, mini_batch_size, eta, lmbda):
-    """
-    Run SGD on the final layer of the Network ``self``.  Note that
-    ``training_data`` is the input to the whole Network, not the
-    encoded training data input to the final layer. 
-    """
-    encoded_training_data = [
-        (self.feedforward(x, start=0, end=self.num_layers-2), y) 
-        for x, y in training_data]
-    net = Network(self.sizes[-2:])
-    net.biases[0] = self.biases[-1]
-    net.weights[0] = self.weights[-1]
-    net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda)
-    self.biases[-1] = net.biases[0]
-    self.weights[-1] = net.weights[0]
-
-
-# Add the SGD_final_layer method to the Network class
-Network.SGD_final_layer = SGD_final_layer
diff --git a/src/old/gradient_descent_hack.py b/src/old/gradient_descent_hack.py
deleted file mode 100644
index 0a7ac5e7d..000000000
--- a/src/old/gradient_descent_hack.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""
-gradient_descent_hack
-~~~~~~~~~~~~~~~~~~~~~
-
-This program uses gradient descent to learn weights and biases for a
-three-neuron network to compute the XOR function.  The program is a
-quick-and-dirty hack meant to illustrate the basic ideas of gradient
-descent, not a cleanly-designed and generalizable implementation."""
-
-#### Libraries
-# Third-party libraries
-import matplotlib.pyplot as plt
-import numpy as np
-
-def sigmoid(z):
-    return 1.0/(1.0+np.exp(-z))
-
-def neuron(w, x):
-    """ Return the output from the sigmoid neuron with weights ``w``
-    and inputs ``x``.  Both are numpy arrays, with three and two
-    elements, respectively.  The first input weight is the bias."""
-    return sigmoid(w[0]+np.inner(w[1:], x))
-
-def h(w, x):
-    """ Return the output from the three-neuron network with weights
-    ``w`` and inputs ``x``.  Note that ``w`` is a numpy array with
-    nine elements, consisting of three weights for each neuron (the
-    bias plus two input weights).  ``x`` is a numpy array with just
-    two elements."""
-    neuron1_out = neuron(w[0:3], x) # top left neuron
-    neuron2_out = neuron(w[3:6], x) # bottom left neuron
-    return neuron(w[6:9], np.array([neuron1_out, neuron2_out]))
-
-# inputs and corresponding outputs for the function we're computing (XOR)
-INPUTS = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]] 
-OUTPUTS = [0.0, 1.0, 1.0, 0.0]
-
-def cost(w):
-    """ Return the cost when the neural network has weights ``w``.
-    The cost is computed with respect to the XOR function."""
-    return 0.5 * sum((y-h(w, np.array(x)))**2 for x, y in zip(INPUTS, OUTPUTS))
-
-def partial(f, k, w):
-    """ Return the partial derivative of the function ``f`` with
-    respect to the ``k``th variable, at location ``w``.  Note that
-    ``f`` must take a numpy array as input, and the partial derivative
-    is evaluated with respect to the ``k``th element in that array.
-    Similarly, ``w`` is a numpy array which can be used as input to
-    ``f``."""
-    w_plus, w_minus = w.copy(), w.copy()
-    w_plus[k] += 0.01 # using epsilon = 0.01
-    w_minus[k] += -0.01
-    return (f(w_plus)-f(w_minus))/0.02
-    
-def gradient_descent(cost, eta, n):
-    """ Perform ``n`` iterations of the gradient descent algorithm to
-    minimize the ``cost`` function, with a learning rate ``eta``.
-    Return a tuple whose first entry is an array containing the final
-    weights, and whose second entry is a list of the values the
-    ``cost`` function took at different iterations."""
-    w = np.random.uniform(-1, 1, 9) # initialize weights randomly
-    costs = []
-    for j in xrange(n):
-        c = cost(w)
-        print "Current cost: {0:.3f}".format(c)
-        costs.append(c)
-        gradient = [partial(cost, k, w) for k in xrange(9)]
-        w = np.array([wt-eta*d for wt, d in zip(w, gradient)])
-    return w, costs
-
-def main():
-    """ Perform gradient descent to find weights for a sigmoid neural
-    network to compute XOR.  10,000 iterations are used.  Outputs the
-    final value of the cost function, the final weights, and plots a
-    graph of cost as a function of iteration."""
-    w, costs = gradient_descent(cost, 0.1, 10000)
-    print "\nFinal cost: {0:.3f}".format(cost(w))
-    print "\nFinal weights: %s" % w
-    plt.plot(np.array(costs))
-    plt.xlabel('iteration')
-    plt.ylabel('cost')
-    plt.title('How cost decreases with the number of iterations')
-    plt.show()
-
-if __name__ == "__main__":
-    main()
diff --git a/src/old/mnist_100_30_deep_autoencoder.png b/src/old/mnist_100_30_deep_autoencoder.png
deleted file mode 100644
index c9544f266..000000000
Binary files a/src/old/mnist_100_30_deep_autoencoder.png and /dev/null differ
diff --git a/src/old/mnist_100_unit_autoencoder.png b/src/old/mnist_100_unit_autoencoder.png
deleted file mode 100644
index cb706f901..000000000
Binary files a/src/old/mnist_100_unit_autoencoder.png and /dev/null differ
diff --git a/src/old/mnist_10_unit_autoencoder.png b/src/old/mnist_10_unit_autoencoder.png
deleted file mode 100644
index 8042848e6..000000000
Binary files a/src/old/mnist_10_unit_autoencoder.png and /dev/null differ
diff --git a/src/old/mnist_30_component_pca.png b/src/old/mnist_30_component_pca.png
deleted file mode 100644
index 7a80e1f63..000000000
Binary files a/src/old/mnist_30_component_pca.png and /dev/null differ
diff --git a/src/old/mnist_30_unit_autoencoder.png b/src/old/mnist_30_unit_autoencoder.png
deleted file mode 100644
index ecf1efa98..000000000
Binary files a/src/old/mnist_30_unit_autoencoder.png and /dev/null differ
diff --git a/src/old/mnist_autoencoder.py b/src/old/mnist_autoencoder.py
deleted file mode 100644
index e5138bade..000000000
--- a/src/old/mnist_autoencoder.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-mnist_autoencoder
-~~~~~~~~~~~~~~~~~
-
-Implements an autoencoder for the MNIST data.  The program can do two
-things: (1) plot the autoencoder's output for the first ten images in
-the MNIST test set; and (2) use the autoencoder to build a classifier.
-The program is a quick-and-dirty hack --- we'll do things in a more
-systematic way in the module ``deep_autoencoder``.
-"""
-
-# My Libraries
-from backprop2 import Network
-import mnist_loader 
-
-# Third-party libraries
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-
-def autoencoder_results(hidden_units):
-    """
-    Train an autoencoder using the MNIST training data and plot the
-    results when the first ten MNIST test images are passed through
-    the autoencoder.
-    """
-    training_data, test_inputs, actual_test_results = \
-        mnist_loader.load_data_nn()
-    net = train_autoencoder(hidden_units, training_data)
-    plot_test_results(net, test_inputs)
-
-def train_autoencoder(hidden_units, training_data):
-    "Return a trained autoencoder."
-    autoencoder_training_data = [(x, x) for x, _ in training_data]
-    net = Network([784, hidden_units, 784])
-    net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05)
-    return net
-
-def plot_test_results(net, test_inputs):
-    """
-    Plot the results after passing the first ten test MNIST digits through
-    the autoencoder ``net``."""
-    fig = plt.figure()
-    ax = fig.add_subplot(111)
-    images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
-    images_out = [net.feedforward(test_inputs[j]).reshape(-1, 28) 
-                  for j in range(10)]
-    image_in = np.concatenate(images_in, axis=1)
-    image_out = np.concatenate(images_out, axis=1)
-    image = np.concatenate([image_in, image_out])
-    ax.matshow(image, cmap = matplotlib.cm.binary)
-    plt.xticks(np.array([]))
-    plt.yticks(np.array([]))
-    plt.show()
-
-def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs):
-    """
-    Train a semi-supervised classifier.  We begin with pretraining,
-    creating an autoencoder which uses ``n_unlabeled_inputs`` from the
-    MNIST training data.  This is then converted into a classifier
-    which is fine-tuned using the ``n_labeled_inputs``.
-
-    For comparison a classifier is also created which does not make
-    use of the unlabeled data.
-    """
-    training_data, test_inputs, actual_test_results = \
-        mnist_loader.load_data_nn()
-    print "\nUsing pretraining and %s items of unlabeled data" %\
-        n_unlabeled_inputs
-    net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs])
-    net_c = Network([784, hidden_units, 10])
-    net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)]
-    net_c.weights = net_ae.weights[:1]+\
-        [np.random.randn(10, hidden_units)/np.sqrt(10)]
-    net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
-    print "Result on test data: %s / %s" % (
-        net_c.evaluate(test_inputs, actual_test_results), len(test_inputs))
-    print "Training a network with %s items of training data" % n_labeled_inputs
-    net = Network([784, hidden_units, 10])
-    net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05)
-    print "Result on test data: %s / %s" % (
-        net.evaluate(test_inputs, actual_test_results), len(test_inputs))
-    return net_c
diff --git a/src/old/mnist_pca.py b/src/old/mnist_pca.py
deleted file mode 100644
index b06de6bc4..000000000
--- a/src/old/mnist_pca.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-mnist_pca
-~~~~~~~~~
-
-Use PCA to reconstruct some of the MNIST test digits.
-"""
-
-# My libraries
-import mnist_loader
-
-# Third-party libraries
-import matplotlib
-import matplotlib.pyplot as plt
-import numpy as np
-from sklearn.decomposition import RandomizedPCA
-
-
-# Training
-training_data, test_inputs, actual_test_results = mnist_loader.load_data_nn()
-pca = RandomizedPCA(n_components=30)
-nn_images = [x for (x, y) in training_data]
-pca_images = np.concatenate(nn_images, axis=1).transpose()
-pca_r = pca.fit(pca_images)
-
-# Try PCA on first ten test images
-test_images = np.array(test_inputs[:10]).reshape((10,784))
-test_outputs = pca_r.inverse_transform(pca_r.transform(test_images))
-
-# Plot the first ten test images and the corresponding outputs
-fig = plt.figure()
-ax = fig.add_subplot(111)
-images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)]
-images_out = [test_outputs[j].reshape(-1, 28) for j in range(10)]
-image_in = np.concatenate(images_in, axis=1)
-image_out = np.concatenate(images_out, axis=1)
-image = np.concatenate([image_in, image_out])
-ax.matshow(image, cmap = matplotlib.cm.binary)
-plt.xticks(np.array([]))
-plt.yticks(np.array([]))
-plt.show()
diff --git a/src/old/perceptron_learning.py b/src/old/perceptron_learning.py
deleted file mode 100644
index a2c04f033..000000000
--- a/src/old/perceptron_learning.py
+++ /dev/null
@@ -1,67 +0,0 @@
-"""
-perceptron_learning
-~~~~~~~~~~~~~~~~~~~
-
-Demonstrates how a perceptron can learn the NAND gate, using the
-perceptron learning algorithm."""
-
-#### Libraries
-# Third-party library
-import numpy as np
-
-class Perceptron(object):
-    """ A Perceptron instance can take a function and attempt to
-    ``learn`` a bias and set of weights that compute that function,
-    using the perceptron learning algorithm."""
-
-    def __init__(self, num_inputs=2):
-        """ Initialize the perceptron with the bias and all weights
-        set to 0.0. ``num_inputs`` is the number of input bits to the
-        perceptron."""
-        self.num_inputs = num_inputs
-        self.bias = 0.0
-        self.weights = np.zeros(num_inputs)
-        # self.inputs is a convenience attribute.  It's a list containing
-        # all possible binary inputs to the perceptron.  E.g., for three
-        # inputs it is: [np.array([0, 0, 0]), np.array([0, 0, 1]), ...]
-        self.inputs = [np.array([int(y)
-                        for y in bin(x).lstrip("0b").zfill(num_inputs)])
-                       for x in xrange(2**num_inputs)]
-
-    def output(self, x):
-        """ Return the output (0 or 1) from the perceptron, with input
-        ``x``."""
-        return 1 if np.inner(self.weights, x)+self.bias > 0 else 0
-
-    def learn(self, f, eta=0.1):
-        """ Find a bias and a set of weights for a perceptron that
-        computes the function ``f``. ``eta`` is the learning rate, and
-        should be a small positive number.  Does not terminate when
-        the function cannot be computed using a perceptron."""
-        # initialize the bias and weights with random values
-        self.bias = np.random.normal()
-        self.weights = np.random.randn(self.num_inputs)
-        number_of_errors = -1
-        while number_of_errors != 0:
-            number_of_errors = 0
-            print "Beginning iteration"
-            print "Bias: {:.3f}".format(self.bias)
-            print "Weights:", ", ".join(
-                "{:.3f}".format(wt) for wt in self.weights)
-            for x in self.inputs:
-                error = f(x)-self.output(x)
-                if error:
-                    number_of_errors += 1
-                    self.bias = self.bias+eta*error
-                    self.weights = self.weights+eta*error*x
-            print "Number of errors:", number_of_errors, "\n"
-
-def f(x):
-    """ Target function for the perceptron learning algorithm.  I've
-    chosen the NAND gate, but any function is okay, with the caveat
-    that the algorithm won't terminate if ``f`` cannot be computed by
-    a perceptron."""
-    return int(not (x[0] and x[1]))
-
-if __name__ == "__main__":
-    Perceptron(2).learn(f, 0.1)