Skip to content

Commit

Permalink
New Examples (aymericdamien#160)
Browse files Browse the repository at this point in the history
* Added basic models examples (kmeans, random forest, ...)
* Added API examples (layers, estimator, ...)
* Added other examples (Multi-GPU, build a dataset, ...)
* Notebook refactoring with new header and more details
  • Loading branch information
aymericdamien authored Aug 29, 2017
1 parent 4e829a6 commit 90bb4de
Show file tree
Hide file tree
Showing 56 changed files with 6,451 additions and 1,524 deletions.
144 changes: 62 additions & 82 deletions README.md

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions examples/2_BasicModels/kmeans.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
""" K-Means.
Implement K-Means algorithm with TensorFlow, and apply it to classify
handwritten digit images. This example is using the MNIST database of
handwritten digits as training samples (http://yann.lecun.com/exdb/mnist/).
Note: This example requires TensorFlow v1.1.0 or over.
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""

from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.contrib.factorization import KMeans

# Ignore all GPUs, tf random forest does not benefit from it.
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
full_data_x = mnist.train.images

# Parameters
num_steps = 50 # Total steps to train
batch_size = 1024 # The number of samples per batch
k = 25 # The number of clusters
num_classes = 10 # The 10 digits
num_features = 784 # Each image is 28x28 pixels

# Input images
X = tf.placeholder(tf.float32, shape=[None, num_features])
# Labels (for assigning a label to a centroid and testing)
Y = tf.placeholder(tf.float32, shape=[None, num_classes])

# K-Means Parameters
kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine',
use_mini_batch=True)

# Build KMeans graph
(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
train_op) = kmeans.training_graph()
cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
avg_distance = tf.reduce_mean(scores)

# Initialize the variables (i.e. assign their default value)
init_vars = tf.global_variables_initializer()

# Start TensorFlow session
sess = tf.Session()

# Run the initializer
sess.run(init_vars, feed_dict={X: full_data_x})
sess.run(init_op, feed_dict={X: full_data_x})

# Training
for i in range(1, num_steps + 1):
_, d, idx = sess.run([train_op, avg_distance, cluster_idx],
feed_dict={X: full_data_x})
if i % 10 == 0 or i == 1:
print("Step %i, Avg Distance: %f" % (i, d))

# Assign a label to each centroid
# Count total number of labels per centroid, using the label of each training
# sample to their closest centroid (given by 'idx')
counts = np.zeros(shape=(k, num_classes))
for i in range(len(idx)):
counts[idx[i]] += mnist.train.labels[i]
# Assign the most frequent label to the centroid
labels_map = [np.argmax(c) for c in counts]
labels_map = tf.convert_to_tensor(labels_map)

# Evaluation ops
# Lookup: centroid_id -> label
cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
# Compute accuracy
correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1), tf.int32))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Test Model
test_x, test_y = mnist.test.images, mnist.test.labels
print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))
6 changes: 4 additions & 2 deletions examples/2_BasicModels/linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@
# Note, minimize() knows to modify W and b because Variable objects are trainable=True by default
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initializing the variables
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Launch the graph
# Start training
with tf.Session() as sess:

# Run the initializer
sess.run(init)

# Fit all training data
Expand Down
6 changes: 4 additions & 2 deletions examples/2_BasicModels/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initializing the variables
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Launch the graph
# Start training
with tf.Session() as sess:

# Run the initializer
sess.run(init)

# Training cycle
Expand Down
6 changes: 4 additions & 2 deletions examples/2_BasicModels/nearest_neighbor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@

accuracy = 0.

# Initializing the variables
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Launch the graph
# Start training
with tf.Session() as sess:

# Run the initializer
sess.run(init)

# loop over test data
Expand Down
75 changes: 75 additions & 0 deletions examples/2_BasicModels/random_forest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
""" Random Forest.
Implement Random Forest algorithm with TensorFlow, and apply it to classify
handwritten digit images. This example is using the MNIST database of
handwritten digits as training samples (http://yann.lecun.com/exdb/mnist/).
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""

from __future__ import print_function

import tensorflow as tf
from tensorflow.contrib.tensor_forest.python import tensor_forest

# Ignore all GPUs, tf random forest does not benefit from it.
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)

# Parameters
num_steps = 500 # Total steps to train
batch_size = 1024 # The number of samples per batch
num_classes = 10 # The 10 digits
num_features = 784 # Each image is 28x28 pixels
num_trees = 10
max_nodes = 1000

# Input and Target data
X = tf.placeholder(tf.float32, shape=[None, num_features])
# For random forest, labels must be integers (the class id)
Y = tf.placeholder(tf.int32, shape=[None])

# Random Forest Parameters
hparams = tensor_forest.ForestHParams(num_classes=num_classes,
num_features=num_features,
num_trees=num_trees,
max_nodes=max_nodes).fill()

# Build the Random Forest
forest_graph = tensor_forest.RandomForestGraphs(hparams)
# Get training graph and loss
train_op = forest_graph.training_graph(X, Y)
loss_op = forest_graph.training_loss(X, Y)

# Measure the accuracy
infer_op = forest_graph.inference_graph(X)
correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Initialize the variables (i.e. assign their default value)
init_vars = tf.global_variables_initializer()

# Start TensorFlow session
sess = tf.Session()

# Run the initializer
sess.run(init_vars)

# Training
for i in range(1, num_steps + 1):
# Prepare Data
# Get the next batch of MNIST data (only images are needed, not labels)
batch_x, batch_y = mnist.train.next_batch(batch_size)
_, l = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})
if i % 50 == 0 or i == 1:
acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))

# Test Model
test_x, test_y = mnist.test.images, mnist.test.labels
print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))
126 changes: 76 additions & 50 deletions examples/3_NeuralNetworks/autoencoder.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
# -*- coding: utf-8 -*-

""" Auto Encoder Example.
Using an auto encoder on MNIST handwritten digits.
Build a 2 layers auto-encoder with TensorFlow to compress images to a
lower latent space and then reconstruct them.
References:
Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
learning applied to document recognition." Proceedings of the IEEE,
86(11):2278-2324, November 1998.
Links:
[MNIST Dataset] http://yann.lecun.com/exdb/mnist/
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""
from __future__ import division, print_function, absolute_import

Expand All @@ -17,37 +22,37 @@

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
# Training Parameters
learning_rate = 0.01
training_epochs = 20
num_steps = 30000
batch_size = 256
display_step = 1

display_step = 1000
examples_to_show = 10

# Network Parameters
n_hidden_1 = 256 # 1st layer num features
n_hidden_2 = 128 # 2nd layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
num_hidden_1 = 256 # 1st layer num features
num_hidden_2 = 128 # 2nd layer num features (the latent dim)
num_input = 784 # MNIST data input (img shape: 28*28)

# tf Graph input (only pictures)
X = tf.placeholder("float", [None, n_input])
X = tf.placeholder("float", [None, num_input])

weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2])),
'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1])),
'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input])),
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'decoder_b2': tf.Variable(tf.random_normal([n_input])),
'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
'decoder_b2': tf.Variable(tf.random_normal([num_input])),
}


# Building the encoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
Expand Down Expand Up @@ -79,38 +84,59 @@ def decoder(x):
y_true = X

# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)

# Initializing the variables
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

# Launch the graph
# Start Training
# Start a new TF session
with tf.Session() as sess:

# Run the initializer
sess.run(init)
total_batch = int(mnist.train.num_examples/batch_size)
# Training cycle
for epoch in range(training_epochs):
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),
"cost=", "{:.9f}".format(c))

print("Optimization Finished!")

# Applying encode and decode over test set
encode_decode = sess.run(
y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})
# Compare original images with their reconstructions
f, a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(examples_to_show):
a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
f.show()
plt.draw()
plt.waitforbuttonpress()

# Training
for i in range(1, num_steps+1):
# Prepare Data
# Get the next batch of MNIST data (only images are needed, not labels)
batch_x, _ = mnist.train.next_batch(batch_size)

# Run optimization op (backprop) and cost op (to get loss value)
_, l = sess.run([optimizer, loss], feed_dict={X: batch_x})
# Display logs per step
if i % display_step == 0 or i == 1:
print('Step %i: Minibatch Loss: %f' % (i, l))

# Testing
# Encode and decode images from test set and visualize their reconstruction.
n = 4
canvas_orig = np.empty((28 * n, 28 * n))
canvas_recon = np.empty((28 * n, 28 * n))
for i in range(n):
# MNIST test set
batch_x, _ = mnist.test.next_batch(n)
# Encode and decode the digit image
g = sess.run(decoder_op, feed_dict={X: batch_x})

# Display original images
for j in range(n):
# Draw the original digits
canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
batch_x[j].reshape([28, 28])
# Display reconstructed images
for j in range(n):
# Draw the reconstructed digits
canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
g[j].reshape([28, 28])

print("Original Images")
plt.figure(figsize=(n, n))
plt.imshow(canvas_orig, origin="upper", cmap="gray")
plt.show()

print("Reconstructed Images")
plt.figure(figsize=(n, n))
plt.imshow(canvas_recon, origin="upper", cmap="gray")
plt.show()
Loading

0 comments on commit 90bb4de

Please sign in to comment.