New Examples (aymericdamien#160)

* Added basic models examples (kmeans, random forest, ...) * Added API examples (layers, estimator, ...) * Added other examples (Multi-GPU, build a dataset, ...) * Notebook refactoring with new header and more details
jdoe68877 · Aug 29, 2017 · 90bb4de · 90bb4de
1 parent 4e829a6
commit 90bb4de
Show file tree

Hide file tree

Showing 56 changed files with 6,451 additions and 1,524 deletions.
diff --git a/README.md b/README.md
diff --git a/examples/2_BasicModels/kmeans.py b/examples/2_BasicModels/kmeans.py
@@ -0,0 +1,86 @@
+""" K-Means.
+
+Implement K-Means algorithm with TensorFlow, and apply it to classify
+handwritten digit images. This example is using the MNIST database of
+handwritten digits as training samples (http://yann.lecun.com/exdb/mnist/).
+
+Note: This example requires TensorFlow v1.1.0 or over.
+
+Author: Aymeric Damien
+Project: https://github.com/aymericdamien/TensorFlow-Examples/
+"""
+
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.contrib.factorization import KMeans
+
+# Ignore all GPUs, tf random forest does not benefit from it.
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+# Import MNIST data
+from tensorflow.examples.tutorials.mnist import input_data
+mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
+full_data_x = mnist.train.images
+
+# Parameters
+num_steps = 50 # Total steps to train
+batch_size = 1024 # The number of samples per batch
+k = 25 # The number of clusters
+num_classes = 10 # The 10 digits
+num_features = 784 # Each image is 28x28 pixels
+
+# Input images
+X = tf.placeholder(tf.float32, shape=[None, num_features])
+# Labels (for assigning a label to a centroid and testing)
+Y = tf.placeholder(tf.float32, shape=[None, num_classes])
+
+# K-Means Parameters
+kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine',
+                use_mini_batch=True)
+
+# Build KMeans graph
+(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
+train_op) = kmeans.training_graph()
+cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
+avg_distance = tf.reduce_mean(scores)
+
+# Initialize the variables (i.e. assign their default value)
+init_vars = tf.global_variables_initializer()
+
+# Start TensorFlow session
+sess = tf.Session()
+
+# Run the initializer
+sess.run(init_vars, feed_dict={X: full_data_x})
+sess.run(init_op, feed_dict={X: full_data_x})
+
+# Training
+for i in range(1, num_steps + 1):
+    _, d, idx = sess.run([train_op, avg_distance, cluster_idx],
+                         feed_dict={X: full_data_x})
+    if i % 10 == 0 or i == 1:
+        print("Step %i, Avg Distance: %f" % (i, d))
+
+# Assign a label to each centroid
+# Count total number of labels per centroid, using the label of each training
+# sample to their closest centroid (given by 'idx')
+counts = np.zeros(shape=(k, num_classes))
+for i in range(len(idx)):
+    counts[idx[i]] += mnist.train.labels[i]
+# Assign the most frequent label to the centroid
+labels_map = [np.argmax(c) for c in counts]
+labels_map = tf.convert_to_tensor(labels_map)
+
+# Evaluation ops
+# Lookup: centroid_id -> label
+cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
+# Compute accuracy
+correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1), tf.int32))
+accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+
+# Test Model
+test_x, test_y = mnist.test.images, mnist.test.labels
+print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))
diff --git a/examples/2_BasicModels/linear_regression.py b/examples/2_BasicModels/linear_regression.py
@@ -41,11 +41,13 @@
 #  Note, minimize() knows to modify W and b because Variable objects are trainable=True by default
 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
 
-# Initializing the variables
+# Initialize the variables (i.e. assign their default value)
 init = tf.global_variables_initializer()
 
-# Launch the graph
+# Start training
 with tf.Session() as sess:
+
+    # Run the initializer
     sess.run(init)
 
     # Fit all training data

diff --git a/examples/2_BasicModels/logistic_regression.py b/examples/2_BasicModels/logistic_regression.py
@@ -37,11 +37,13 @@
 # Gradient Descent
 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
 
-# Initializing the variables
+# Initialize the variables (i.e. assign their default value)
 init = tf.global_variables_initializer()
 
-# Launch the graph
+# Start training
 with tf.Session() as sess:
+
+    # Run the initializer
     sess.run(init)
 
     # Training cycle

diff --git a/examples/2_BasicModels/nearest_neighbor.py b/examples/2_BasicModels/nearest_neighbor.py
@@ -32,11 +32,13 @@
 
 accuracy = 0.
 
-# Initializing the variables
+# Initialize the variables (i.e. assign their default value)
 init = tf.global_variables_initializer()
 
-# Launch the graph
+# Start training
 with tf.Session() as sess:
+
+    # Run the initializer
     sess.run(init)
 
     # loop over test data

diff --git a/examples/2_BasicModels/random_forest.py b/examples/2_BasicModels/random_forest.py
@@ -0,0 +1,75 @@
+""" Random Forest.
+
+Implement Random Forest algorithm with TensorFlow, and apply it to classify 
+handwritten digit images. This example is using the MNIST database of 
+handwritten digits as training samples (http://yann.lecun.com/exdb/mnist/).
+
+Author: Aymeric Damien
+Project: https://github.com/aymericdamien/TensorFlow-Examples/
+"""
+
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow.contrib.tensor_forest.python import tensor_forest
+
+# Ignore all GPUs, tf random forest does not benefit from it.
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+# Import MNIST data
+from tensorflow.examples.tutorials.mnist import input_data
+mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
+
+# Parameters
+num_steps = 500 # Total steps to train
+batch_size = 1024 # The number of samples per batch
+num_classes = 10 # The 10 digits
+num_features = 784 # Each image is 28x28 pixels
+num_trees = 10
+max_nodes = 1000
+
+# Input and Target data
+X = tf.placeholder(tf.float32, shape=[None, num_features])
+# For random forest, labels must be integers (the class id)
+Y = tf.placeholder(tf.int32, shape=[None])
+
+# Random Forest Parameters
+hparams = tensor_forest.ForestHParams(num_classes=num_classes,
+                                      num_features=num_features,
+                                      num_trees=num_trees,
+                                      max_nodes=max_nodes).fill()
+
+# Build the Random Forest
+forest_graph = tensor_forest.RandomForestGraphs(hparams)
+# Get training graph and loss
+train_op = forest_graph.training_graph(X, Y)
+loss_op = forest_graph.training_loss(X, Y)
+
+# Measure the accuracy
+infer_op = forest_graph.inference_graph(X)
+correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
+accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
+
+# Initialize the variables (i.e. assign their default value)
+init_vars = tf.global_variables_initializer()
+
+# Start TensorFlow session
+sess = tf.Session()
+
+# Run the initializer
+sess.run(init_vars)
+
+# Training
+for i in range(1, num_steps + 1):
+    # Prepare Data
+    # Get the next batch of MNIST data (only images are needed, not labels)
+    batch_x, batch_y = mnist.train.next_batch(batch_size)
+    _, l = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})
+    if i % 50 == 0 or i == 1:
+        acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
+        print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))
+
+# Test Model
+test_x, test_y = mnist.test.images, mnist.test.labels
+print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))
diff --git a/examples/3_NeuralNetworks/autoencoder.py b/examples/3_NeuralNetworks/autoencoder.py
@@ -1,13 +1,18 @@
-# -*- coding: utf-8 -*-
-
 """ Auto Encoder Example.
-Using an auto encoder on MNIST handwritten digits.
+
+Build a 2 layers auto-encoder with TensorFlow to compress images to a
+lower latent space and then reconstruct them.
+
 References:
     Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
     learning applied to document recognition." Proceedings of the IEEE,
     86(11):2278-2324, November 1998.
+
 Links:
     [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
+
+Author: Aymeric Damien
+Project: https://github.com/aymericdamien/TensorFlow-Examples/
 """
 from __future__ import division, print_function, absolute_import
 
@@ -17,37 +22,37 @@
 
 # Import MNIST data
 from tensorflow.examples.tutorials.mnist import input_data
-mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
+mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
 
-# Parameters
+# Training Parameters
 learning_rate = 0.01
-training_epochs = 20
+num_steps = 30000
 batch_size = 256
-display_step = 1
+
+display_step = 1000
 examples_to_show = 10
 
 # Network Parameters
-n_hidden_1 = 256 # 1st layer num features
-n_hidden_2 = 128 # 2nd layer num features
-n_input = 784 # MNIST data input (img shape: 28*28)
+num_hidden_1 = 256 # 1st layer num features
+num_hidden_2 = 128 # 2nd layer num features (the latent dim)
+num_input = 784 # MNIST data input (img shape: 28*28)
 
 # tf Graph input (only pictures)
-X = tf.placeholder("float", [None, n_input])
+X = tf.placeholder("float", [None, num_input])
 
 weights = {
-    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
-    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
-    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
-    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
+    'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1])),
+    'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2])),
+    'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1])),
+    'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input])),
 }
 biases = {
-    'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
-    'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
-    'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
-    'decoder_b2': tf.Variable(tf.random_normal([n_input])),
+    'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
+    'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
+    'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
+    'decoder_b2': tf.Variable(tf.random_normal([num_input])),
 }
 
-
 # Building the encoder
 def encoder(x):
     # Encoder Hidden layer with sigmoid activation #1
@@ -79,38 +84,59 @@ def decoder(x):
 y_true = X
 
 # Define loss and optimizer, minimize the squared error
-cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
-optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
+loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
+optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
 
-# Initializing the variables
+# Initialize the variables (i.e. assign their default value)
 init = tf.global_variables_initializer()
 
-# Launch the graph
+# Start Training
+# Start a new TF session
 with tf.Session() as sess:
+
+    # Run the initializer
     sess.run(init)
-    total_batch = int(mnist.train.num_examples/batch_size)
-    # Training cycle
-    for epoch in range(training_epochs):
-        # Loop over all batches
-        for i in range(total_batch):
-            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
-            # Run optimization op (backprop) and cost op (to get loss value)
-            _, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
-        # Display logs per epoch step
-        if epoch % display_step == 0:
-            print("Epoch:", '%04d' % (epoch+1),
-                  "cost=", "{:.9f}".format(c))
-
-    print("Optimization Finished!")
-
-    # Applying encode and decode over test set
-    encode_decode = sess.run(
-        y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})
-    # Compare original images with their reconstructions
-    f, a = plt.subplots(2, 10, figsize=(10, 2))
-    for i in range(examples_to_show):
-        a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
-        a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
-    f.show()
-    plt.draw()
-    plt.waitforbuttonpress()
+
+    # Training
+    for i in range(1, num_steps+1):
+        # Prepare Data
+        # Get the next batch of MNIST data (only images are needed, not labels)
+        batch_x, _ = mnist.train.next_batch(batch_size)
+
+        # Run optimization op (backprop) and cost op (to get loss value)
+        _, l = sess.run([optimizer, loss], feed_dict={X: batch_x})
+        # Display logs per step
+        if i % display_step == 0 or i == 1:
+            print('Step %i: Minibatch Loss: %f' % (i, l))
+
+    # Testing
+    # Encode and decode images from test set and visualize their reconstruction.
+    n = 4
+    canvas_orig = np.empty((28 * n, 28 * n))
+    canvas_recon = np.empty((28 * n, 28 * n))
+    for i in range(n):
+        # MNIST test set
+        batch_x, _ = mnist.test.next_batch(n)
+        # Encode and decode the digit image
+        g = sess.run(decoder_op, feed_dict={X: batch_x})
+
+        # Display original images
+        for j in range(n):
+            # Draw the original digits
+            canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
+                batch_x[j].reshape([28, 28])
+        # Display reconstructed images
+        for j in range(n):
+            # Draw the reconstructed digits
+            canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
+                g[j].reshape([28, 28])
+
+    print("Original Images")
+    plt.figure(figsize=(n, n))
+    plt.imshow(canvas_orig, origin="upper", cmap="gray")
+    plt.show()
+
+    print("Reconstructed Images")
+    plt.figure(figsize=(n, n))
+    plt.imshow(canvas_recon, origin="upper", cmap="gray")
+    plt.show()