diff --git a/README.md b/README.md index 307ac416f..88e47e944 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,9 @@ This repository contains code samples for my book on ["Neural Networks and Deep Learning"](http://neuralnetworksanddeeplearning.com). -The code is written for Python 2.6 or 2.7. Michal Daniel Dobrzanski -has a repository for Python 3 -[here](https://github.com/MichalDanielDobrzanski/DeepLearningPython35). I -will not be updating the current repository for Python 3 -compatibility. +The code is written for Python 2.6 or 2.7. There is a version for +Python 3.8-3.10 [here](https://github.com/unexploredtest/neural-networks-and-deep-learning). +I will not be updating the current repository for Python 3 compatibility. The program `src/network3.py` uses version 0.6 or 0.7 of the Theano library. It needs modification for compatibility with later versions @@ -21,7 +19,7 @@ free to fork and modify the code. MIT License -Copyright (c) 2012-2015 Michael Nielsen +Copyright (c) 2012-2022 Michael Nielsen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/fig/overfitting.py b/fig/overfitting.py index 71688fde8..a37e474c0 100644 --- a/fig/overfitting.py +++ b/fig/overfitting.py @@ -35,9 +35,9 @@ def main(filename, num_epochs, """ run_network(filename, num_epochs, training_set_size, lmbda) make_plots(filename, num_epochs, - test_accuracy_xmin, training_cost_xmin, - test_accuracy_xmin, + test_accuracy_xmin, + test_cost_xmin, training_accuracy_xmin, training_set_size) diff --git a/requirements.txt b/requirements.txt index 54aef7e95..88effd999 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -numpy -scikit-learn -scipy -Theano +numpy==1.13.3 +scikit-learn==0.19.0 +scipy==0.19.1 +Theano==0.7.0 diff --git a/src/old/blog/__init__.py b/src/old/blog/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/old/blog/common_knowledge.py b/src/old/blog/common_knowledge.py deleted file mode 100644 index 788935d39..000000000 --- a/src/old/blog/common_knowledge.py +++ /dev/null @@ -1,90 +0,0 @@ -""" -common_knowledge -~~~~~~~~~~~~~~~~ - -Try to determine whether or not it's possible to relate the -descriptions given by two different autoencoders. - -""" - -#### Libraries -# My libraries -from backprop2 import Network, sigmoid_vec -import mnist_loader - -# Third-party libraries -import matplotlib -import matplotlib.pyplot as plt -import numpy as np - - -#### Parameters -# Size of the training sets. May range from 1000 to 12,500. Lower -# will be faster, higher will give more accuracy. -SIZE = 5000 -# Number of hidden units in the autoencoder -HIDDEN = 30 - -print "\nGenerating training data" -training_data, _, _ = mnist_loader.load_data_nn() -td_1 = [(x, x) for x, _ in training_data[0:SIZE]] -td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]] -td_3 = [x for x, _ in training_data[25000:25000+SIZE]] -test = [x for x, _ in training_data[37500:37500+SIZE]] - -print "\nFinding first autoencoder" -ae_1 = Network([784, HIDDEN, 784]) -ae_1.SGD(td_1, 4, 10, 0.01, 0.05) - -print "\nFinding second autoencoder" -ae_2 = Network([784, HIDDEN, 784]) -ae_2.SGD(td_1, 4, 10, 0.01, 0.05) - -print "\nGenerating encoded training data" -encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) - for x in td_3] -encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) - for x in td_3] -encoded_training_data = zip(encoded_td_1, encoded_td_2) - -print "\nFinding mapping between theories" -net = Network([HIDDEN, HIDDEN]) -net.SGD(encoded_training_data, 6, 10, 0.01, 0.05) - -print """\nBaseline for comparison: decompress with the first autoencoder""" -print """and compress with the second autoencoder""" -encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) - for x in test] -encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) - for x in test] -test_data = zip(encoded_test_1, encoded_test_2) -net_baseline = Network([HIDDEN, 784, HIDDEN]) -net_baseline.biases[0] = ae_1.biases[1] -net_baseline.weights[0] = ae_1.weights[1] -net_baseline.biases[1] = ae_2.biases[0] -net_baseline.weights[1] = ae_2.weights[0] -error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1) - for (x, y) in test_data) -print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,) - -print "\nComparing theories with a simple interconversion" -print "Mean desired output activation: %s" % ( - sum(y.mean() for _, y in test_data) / SIZE,) -error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data) -print "Average l1 error per training image: %s" % (error / SIZE,) - -print "\nComputing fiducial image inputs" -fiducial_images_1 = [ - ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) - for j in range(HIDDEN)] -fiducial_images_2 = [ - ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) - for j in range(HIDDEN)] -image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), - np.concatenate(fiducial_images_2, axis=1)]) -fig = plt.figure() -ax = fig.add_subplot(111) -ax.matshow(image, cmap = matplotlib.cm.binary) -plt.xticks(np.array([])) -plt.yticks(np.array([])) -plt.show() diff --git a/src/old/cost_vs_iterations.png b/src/old/cost_vs_iterations.png deleted file mode 100644 index 74a21934c..000000000 Binary files a/src/old/cost_vs_iterations.png and /dev/null differ diff --git a/src/old/cost_vs_iterations_trapped.png b/src/old/cost_vs_iterations_trapped.png deleted file mode 100644 index 4b24b4820..000000000 Binary files a/src/old/cost_vs_iterations_trapped.png and /dev/null differ diff --git a/src/old/deep_autoencoder.py b/src/old/deep_autoencoder.py deleted file mode 100644 index afafce524..000000000 --- a/src/old/deep_autoencoder.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -deep_autoencoder -~~~~~~~~~~~~~~~~ - -A module which implements deep autoencoders. -""" - -#### Libraries -# Standard library -import random - -# My libraries -from backprop2 import Network, sigmoid_vec - -# Third-party libraries -import numpy as np - - -def plot_helper(x): - import matplotlib - import matplotlib.pyplot as plt - x = np.reshape(x, (-1, 28)) - fig = plt.figure() - ax = fig.add_subplot(1, 1, 1) - ax.matshow(x, cmap = matplotlib.cm.binary) - plt.xticks(np.array([])) - plt.yticks(np.array([])) - plt.show() - - -class DeepAutoencoder(Network): - - def __init__(self, layers): - """ - The list ``layers`` specifies the sizes of the nested - autoencoders. For example, if ``layers`` is [50, 20, 10] then - the deep autoencoder will be a neural network with layers of - size [50, 20, 10, 20, 50].""" - self.layers = layers - Network.__init__(self, layers+layers[-2::-1]) - - def train(self, training_data, epochs, mini_batch_size, eta, - lmbda): - """ - Train the DeepAutoencoder. The ``training_data`` is a list of - training inputs, ``x``, ``mini_batch_size`` is a single - positive integer, and ``epochs``, ``eta``, ``lmbda`` are lists - of parameters, with the different list members corresponding - to the different stages of training. For example, ``eta[0]`` - is the learning rate used for the first nested autoencoder, - ``eta[1]`` is the learning rate for the second nested - autoencoder, and so on. ``eta[-1]`` is the learning rate used - for the final stage of fine-tuning. - """ - print "\nTraining a %s deep autoencoder" % ( - "-".join([str(j) for j in self.sizes]),) - training_data = double(training_data) - cur_training_data = training_data[::] - for j in range(len(self.layers)-1): - print "\nTraining the %s-%s-%s nested autoencoder" % ( - self.layers[j], self.layers[j+1], self.layers[j]) - print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % ( - epochs[j], mini_batch_size, eta[j], lmbda[j]) - self.train_nested_autoencoder( - j, cur_training_data, epochs[j], mini_batch_size, eta[j], - lmbda[j]) - cur_training_data = [ - (sigmoid_vec(np.dot(net.weights[0], x)+net.biases[0]),)*2 - for (x, _) in cur_training_data] - print "\nFine-tuning network weights with backpropagation" - print "%s epochs, mini-batch size %s, eta = %s, lambda = %s" % ( - epochs[-1], mini_batch_size, eta[-1], lmbda[-1]) - self.SGD(training_data, epochs[-1], mini_batch_size, eta[-1], - lmbda[-1]) - - def train_nested_autoencoder( - self, j, encoded_training_data, epochs, mini_batch_size, eta, lmbda): - """ - Train the nested autoencoder that starts at layer ``j`` in the - deep autoencoder. Note that ``encoded_training_data`` is a - list with entries of the form ``(x, x)``, where the ``x`` are - encoded training inputs for layer ``j``.""" - net = Network([self.layers[j], self.layers[j+1], self.layers[j]]) - net.biases[0] = self.biases[j] - net.biases[1] = self.biases[-j-1] - net.weights[0] = self.weights[j] - net.weights[1] = self.weights[-j-1] - net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda) - self.biases[j] = net.biases[0] - self.biases[-j-1] = net.biases[1] - self.weights[j] = net.weights[0] - self.weights[-j-1] = net.weights[1] - - def train_nested_autoencoder_repl( - self, j, training_data, epochs, mini_batch_size, eta, lmbda): - """ - This is a convenience method that can be used from the REPL to - train the nested autoencoder that starts at level ``j`` in the - deep autoencoder. Note that ``training_data`` is the input - data for the first layer of the network, and is a list of - entries ``x``.""" - self.train_nested_autoencoder( - j, - double( - [self.feedforward(x, start=0, end=j) for x in training_data]), - epochs, mini_batch_size, eta, lmbda) - - def feature(self, j, k): - """ - Return the output if neuron number ``k`` in layer ``j`` is - activated, and all others are not active. """ - a = np.zeros((self.sizes[j], 1)) - a[k] = 1.0 - return self.feedforward(a, start=j, end=self.num_layers) - -def double(l): - return [(x, x) for x in l] - diff --git a/src/old/deep_learning.py b/src/old/deep_learning.py deleted file mode 100644 index 9e6517a07..000000000 --- a/src/old/deep_learning.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -deep_learning -~~~~~~~~~~~~~ - -Module to do deep learning. Most of the functionality needed is -already in the ``backprop2`` and ``deep_autoencoder`` modules, but -this adds convenience functions to help in doing things like unrolling -deep autoencoders, and adding and training a classifier layer.""" - -# My Libraries -from backprop2 import Network -from deep_autoencoder import DeepAutoencoder - -def unroll(deep_autoencoder): - """ - Return a Network that contains the compression stage of the - ``deep_autoencoder``.""" - net = Network(deep_autoencoder.layers) - net.weights = deep_autoencoder.weights[:len(deep_autoencoder.layers)-1] - net.biases = deep_autoencoder.biases[:len(deep_autoencoder.layers)-1] - return net - -def add_classifier_layer(net, num_outputs): - """ - Return the Network ``net``, but with an extra layer containing - ``num_outputs`` neurons appended.""" - net_classifier = Network(net.sizes+[num_outputs]) - net_classifier.weights[:-1] = net.weights - net_classifier.biases[:-1] = net.biases - return net_classifier - -def SGD_final_layer( - self, training_data, epochs, mini_batch_size, eta, lmbda): - """ - Run SGD on the final layer of the Network ``self``. Note that - ``training_data`` is the input to the whole Network, not the - encoded training data input to the final layer. - """ - encoded_training_data = [ - (self.feedforward(x, start=0, end=self.num_layers-2), y) - for x, y in training_data] - net = Network(self.sizes[-2:]) - net.biases[0] = self.biases[-1] - net.weights[0] = self.weights[-1] - net.SGD(encoded_training_data, epochs, mini_batch_size, eta, lmbda) - self.biases[-1] = net.biases[0] - self.weights[-1] = net.weights[0] - - -# Add the SGD_final_layer method to the Network class -Network.SGD_final_layer = SGD_final_layer diff --git a/src/old/gradient_descent_hack.py b/src/old/gradient_descent_hack.py deleted file mode 100644 index 0a7ac5e7d..000000000 --- a/src/old/gradient_descent_hack.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -gradient_descent_hack -~~~~~~~~~~~~~~~~~~~~~ - -This program uses gradient descent to learn weights and biases for a -three-neuron network to compute the XOR function. The program is a -quick-and-dirty hack meant to illustrate the basic ideas of gradient -descent, not a cleanly-designed and generalizable implementation.""" - -#### Libraries -# Third-party libraries -import matplotlib.pyplot as plt -import numpy as np - -def sigmoid(z): - return 1.0/(1.0+np.exp(-z)) - -def neuron(w, x): - """ Return the output from the sigmoid neuron with weights ``w`` - and inputs ``x``. Both are numpy arrays, with three and two - elements, respectively. The first input weight is the bias.""" - return sigmoid(w[0]+np.inner(w[1:], x)) - -def h(w, x): - """ Return the output from the three-neuron network with weights - ``w`` and inputs ``x``. Note that ``w`` is a numpy array with - nine elements, consisting of three weights for each neuron (the - bias plus two input weights). ``x`` is a numpy array with just - two elements.""" - neuron1_out = neuron(w[0:3], x) # top left neuron - neuron2_out = neuron(w[3:6], x) # bottom left neuron - return neuron(w[6:9], np.array([neuron1_out, neuron2_out])) - -# inputs and corresponding outputs for the function we're computing (XOR) -INPUTS = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]] -OUTPUTS = [0.0, 1.0, 1.0, 0.0] - -def cost(w): - """ Return the cost when the neural network has weights ``w``. - The cost is computed with respect to the XOR function.""" - return 0.5 * sum((y-h(w, np.array(x)))**2 for x, y in zip(INPUTS, OUTPUTS)) - -def partial(f, k, w): - """ Return the partial derivative of the function ``f`` with - respect to the ``k``th variable, at location ``w``. Note that - ``f`` must take a numpy array as input, and the partial derivative - is evaluated with respect to the ``k``th element in that array. - Similarly, ``w`` is a numpy array which can be used as input to - ``f``.""" - w_plus, w_minus = w.copy(), w.copy() - w_plus[k] += 0.01 # using epsilon = 0.01 - w_minus[k] += -0.01 - return (f(w_plus)-f(w_minus))/0.02 - -def gradient_descent(cost, eta, n): - """ Perform ``n`` iterations of the gradient descent algorithm to - minimize the ``cost`` function, with a learning rate ``eta``. - Return a tuple whose first entry is an array containing the final - weights, and whose second entry is a list of the values the - ``cost`` function took at different iterations.""" - w = np.random.uniform(-1, 1, 9) # initialize weights randomly - costs = [] - for j in xrange(n): - c = cost(w) - print "Current cost: {0:.3f}".format(c) - costs.append(c) - gradient = [partial(cost, k, w) for k in xrange(9)] - w = np.array([wt-eta*d for wt, d in zip(w, gradient)]) - return w, costs - -def main(): - """ Perform gradient descent to find weights for a sigmoid neural - network to compute XOR. 10,000 iterations are used. Outputs the - final value of the cost function, the final weights, and plots a - graph of cost as a function of iteration.""" - w, costs = gradient_descent(cost, 0.1, 10000) - print "\nFinal cost: {0:.3f}".format(cost(w)) - print "\nFinal weights: %s" % w - plt.plot(np.array(costs)) - plt.xlabel('iteration') - plt.ylabel('cost') - plt.title('How cost decreases with the number of iterations') - plt.show() - -if __name__ == "__main__": - main() diff --git a/src/old/mnist_100_30_deep_autoencoder.png b/src/old/mnist_100_30_deep_autoencoder.png deleted file mode 100644 index c9544f266..000000000 Binary files a/src/old/mnist_100_30_deep_autoencoder.png and /dev/null differ diff --git a/src/old/mnist_100_unit_autoencoder.png b/src/old/mnist_100_unit_autoencoder.png deleted file mode 100644 index cb706f901..000000000 Binary files a/src/old/mnist_100_unit_autoencoder.png and /dev/null differ diff --git a/src/old/mnist_10_unit_autoencoder.png b/src/old/mnist_10_unit_autoencoder.png deleted file mode 100644 index 8042848e6..000000000 Binary files a/src/old/mnist_10_unit_autoencoder.png and /dev/null differ diff --git a/src/old/mnist_30_component_pca.png b/src/old/mnist_30_component_pca.png deleted file mode 100644 index 7a80e1f63..000000000 Binary files a/src/old/mnist_30_component_pca.png and /dev/null differ diff --git a/src/old/mnist_30_unit_autoencoder.png b/src/old/mnist_30_unit_autoencoder.png deleted file mode 100644 index ecf1efa98..000000000 Binary files a/src/old/mnist_30_unit_autoencoder.png and /dev/null differ diff --git a/src/old/mnist_autoencoder.py b/src/old/mnist_autoencoder.py deleted file mode 100644 index e5138bade..000000000 --- a/src/old/mnist_autoencoder.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -mnist_autoencoder -~~~~~~~~~~~~~~~~~ - -Implements an autoencoder for the MNIST data. The program can do two -things: (1) plot the autoencoder's output for the first ten images in -the MNIST test set; and (2) use the autoencoder to build a classifier. -The program is a quick-and-dirty hack --- we'll do things in a more -systematic way in the module ``deep_autoencoder``. -""" - -# My Libraries -from backprop2 import Network -import mnist_loader - -# Third-party libraries -import matplotlib -import matplotlib.pyplot as plt -import numpy as np - -def autoencoder_results(hidden_units): - """ - Train an autoencoder using the MNIST training data and plot the - results when the first ten MNIST test images are passed through - the autoencoder. - """ - training_data, test_inputs, actual_test_results = \ - mnist_loader.load_data_nn() - net = train_autoencoder(hidden_units, training_data) - plot_test_results(net, test_inputs) - -def train_autoencoder(hidden_units, training_data): - "Return a trained autoencoder." - autoencoder_training_data = [(x, x) for x, _ in training_data] - net = Network([784, hidden_units, 784]) - net.SGD(autoencoder_training_data, 6, 10, 0.01, 0.05) - return net - -def plot_test_results(net, test_inputs): - """ - Plot the results after passing the first ten test MNIST digits through - the autoencoder ``net``.""" - fig = plt.figure() - ax = fig.add_subplot(111) - images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)] - images_out = [net.feedforward(test_inputs[j]).reshape(-1, 28) - for j in range(10)] - image_in = np.concatenate(images_in, axis=1) - image_out = np.concatenate(images_out, axis=1) - image = np.concatenate([image_in, image_out]) - ax.matshow(image, cmap = matplotlib.cm.binary) - plt.xticks(np.array([])) - plt.yticks(np.array([])) - plt.show() - -def classifier(hidden_units, n_unlabeled_inputs, n_labeled_inputs): - """ - Train a semi-supervised classifier. We begin with pretraining, - creating an autoencoder which uses ``n_unlabeled_inputs`` from the - MNIST training data. This is then converted into a classifier - which is fine-tuned using the ``n_labeled_inputs``. - - For comparison a classifier is also created which does not make - use of the unlabeled data. - """ - training_data, test_inputs, actual_test_results = \ - mnist_loader.load_data_nn() - print "\nUsing pretraining and %s items of unlabeled data" %\ - n_unlabeled_inputs - net_ae = train_autoencoder(hidden_units, training_data[:n_unlabeled_inputs]) - net_c = Network([784, hidden_units, 10]) - net_c.biases = net_ae.biases[:1]+[np.random.randn(10, 1)/np.sqrt(10)] - net_c.weights = net_ae.weights[:1]+\ - [np.random.randn(10, hidden_units)/np.sqrt(10)] - net_c.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05) - print "Result on test data: %s / %s" % ( - net_c.evaluate(test_inputs, actual_test_results), len(test_inputs)) - print "Training a network with %s items of training data" % n_labeled_inputs - net = Network([784, hidden_units, 10]) - net.SGD(training_data[-n_labeled_inputs:], 300, 10, 0.01, 0.05) - print "Result on test data: %s / %s" % ( - net.evaluate(test_inputs, actual_test_results), len(test_inputs)) - return net_c diff --git a/src/old/mnist_pca.py b/src/old/mnist_pca.py deleted file mode 100644 index b06de6bc4..000000000 --- a/src/old/mnist_pca.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -mnist_pca -~~~~~~~~~ - -Use PCA to reconstruct some of the MNIST test digits. -""" - -# My libraries -import mnist_loader - -# Third-party libraries -import matplotlib -import matplotlib.pyplot as plt -import numpy as np -from sklearn.decomposition import RandomizedPCA - - -# Training -training_data, test_inputs, actual_test_results = mnist_loader.load_data_nn() -pca = RandomizedPCA(n_components=30) -nn_images = [x for (x, y) in training_data] -pca_images = np.concatenate(nn_images, axis=1).transpose() -pca_r = pca.fit(pca_images) - -# Try PCA on first ten test images -test_images = np.array(test_inputs[:10]).reshape((10,784)) -test_outputs = pca_r.inverse_transform(pca_r.transform(test_images)) - -# Plot the first ten test images and the corresponding outputs -fig = plt.figure() -ax = fig.add_subplot(111) -images_in = [test_inputs[j].reshape(-1, 28) for j in range(10)] -images_out = [test_outputs[j].reshape(-1, 28) for j in range(10)] -image_in = np.concatenate(images_in, axis=1) -image_out = np.concatenate(images_out, axis=1) -image = np.concatenate([image_in, image_out]) -ax.matshow(image, cmap = matplotlib.cm.binary) -plt.xticks(np.array([])) -plt.yticks(np.array([])) -plt.show() diff --git a/src/old/perceptron_learning.py b/src/old/perceptron_learning.py deleted file mode 100644 index a2c04f033..000000000 --- a/src/old/perceptron_learning.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -perceptron_learning -~~~~~~~~~~~~~~~~~~~ - -Demonstrates how a perceptron can learn the NAND gate, using the -perceptron learning algorithm.""" - -#### Libraries -# Third-party library -import numpy as np - -class Perceptron(object): - """ A Perceptron instance can take a function and attempt to - ``learn`` a bias and set of weights that compute that function, - using the perceptron learning algorithm.""" - - def __init__(self, num_inputs=2): - """ Initialize the perceptron with the bias and all weights - set to 0.0. ``num_inputs`` is the number of input bits to the - perceptron.""" - self.num_inputs = num_inputs - self.bias = 0.0 - self.weights = np.zeros(num_inputs) - # self.inputs is a convenience attribute. It's a list containing - # all possible binary inputs to the perceptron. E.g., for three - # inputs it is: [np.array([0, 0, 0]), np.array([0, 0, 1]), ...] - self.inputs = [np.array([int(y) - for y in bin(x).lstrip("0b").zfill(num_inputs)]) - for x in xrange(2**num_inputs)] - - def output(self, x): - """ Return the output (0 or 1) from the perceptron, with input - ``x``.""" - return 1 if np.inner(self.weights, x)+self.bias > 0 else 0 - - def learn(self, f, eta=0.1): - """ Find a bias and a set of weights for a perceptron that - computes the function ``f``. ``eta`` is the learning rate, and - should be a small positive number. Does not terminate when - the function cannot be computed using a perceptron.""" - # initialize the bias and weights with random values - self.bias = np.random.normal() - self.weights = np.random.randn(self.num_inputs) - number_of_errors = -1 - while number_of_errors != 0: - number_of_errors = 0 - print "Beginning iteration" - print "Bias: {:.3f}".format(self.bias) - print "Weights:", ", ".join( - "{:.3f}".format(wt) for wt in self.weights) - for x in self.inputs: - error = f(x)-self.output(x) - if error: - number_of_errors += 1 - self.bias = self.bias+eta*error - self.weights = self.weights+eta*error*x - print "Number of errors:", number_of_errors, "\n" - -def f(x): - """ Target function for the perceptron learning algorithm. I've - chosen the NAND gate, but any function is okay, with the caveat - that the algorithm won't terminate if ``f`` cannot be computed by - a perceptron.""" - return int(not (x[0] and x[1])) - -if __name__ == "__main__": - Perceptron(2).learn(f, 0.1)