Skip to content

Commit

Permalink
tf.keras.layers and saved
Browse files Browse the repository at this point in the history
  • Loading branch information
Gouet committed May 23, 2019
1 parent 987372f commit 18be18a
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 42 deletions.
57 changes: 28 additions & 29 deletions ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau
self.batch_size = batch_size

# Actor Network
self.inputs, self.out, self.scaled_out = self.create_actor_network()
self.inputs, self.out = self.create_actor_network()

self.network_params = tf.trainable_variables()

# Target Network
self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
self.target_inputs, self.target_out = self.create_actor_network()

self.target_network_params = tf.trainable_variables()[
len(self.network_params):]
Expand All @@ -104,7 +104,7 @@ def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau

# Combine the gradients here
self.unnormalized_actor_gradients = tf.gradients(
self.scaled_out, self.network_params, -self.action_gradient)
self.out, self.network_params, -self.action_gradient)
self.actor_gradients = list(map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients))

# Optimization Op
Expand All @@ -115,20 +115,18 @@ def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau
self.network_params) + len(self.target_network_params)

def create_actor_network(self):
inputs = tflearn.input_data(shape=[None, self.s_dim])
net = tflearn.fully_connected(inputs, 400)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
net = tflearn.fully_connected(net, 300)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
inputs = tf.keras.layers.Input(shape=(self.s_dim,))
net = tf.keras.layers.Dense(400)(inputs)
net = tf.keras.layers.BatchNormalization()(net)
net = tf.keras.layers.Activation(tf.nn.relu)(net)
net = tf.keras.layers.Dense(300)(net)
net = tf.keras.layers.BatchNormalization()(net)
net = tf.keras.layers.Activation(tf.nn.relu)(net)

# Final layer weights are init to Uniform[-3e-3, 3e-3]
w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003)
out = tflearn.fully_connected(
net, self.a_dim, activation='tanh', weights_init=w_init)
# Scale output to -action_bound to action_bound
scaled_out = tf.multiply(out, self.action_bound)
return inputs, out, scaled_out
out = tf.keras.layers.Dense(self.a_dim, activation='tanh', kernel_initializer=w_init)(net)
return inputs, out

def train(self, inputs, a_gradient):
self.sess.run(self.optimize, feed_dict={
Expand All @@ -137,12 +135,12 @@ def train(self, inputs, a_gradient):
})

def predict(self, inputs):
return self.sess.run(self.scaled_out, feed_dict={
return self.sess.run(self.out, feed_dict={
self.inputs: inputs
})

def predict_target(self, inputs):
return self.sess.run(self.target_scaled_out, feed_dict={
return self.sess.run(self.target_out, feed_dict={
self.target_inputs: inputs
})

Expand Down Expand Up @@ -194,24 +192,25 @@ def __init__(self, sess, state_dim, action_dim, learning_rate, tau, gamma, num_a
self.action_grads = tf.gradients(self.out, self.action)

def create_critic_network(self):
inputs = tflearn.input_data(shape=[None, self.s_dim])
action = tflearn.input_data(shape=[None, self.a_dim])
net = tflearn.fully_connected(inputs, 400)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
#inputs = tflearn.input_data(shape=[None, self.s_dim])
#action = tflearn.input_data(shape=[None, self.a_dim])
inputs = tf.keras.layers.Input(shape=(self.s_dim,))
action = tf.keras.layers.Input(shape=(self.a_dim,))
net = tf.keras.layers.Dense(400)(inputs)
net = tf.keras.layers.BatchNormalization()(net)
net = tf.keras.layers.Activation(tf.nn.relu)(net)

# Add the action tensor in the 2nd hidden layer
# Use two temp layers to get the corresponding weights and biases
t1 = tflearn.fully_connected(net, 300)
t2 = tflearn.fully_connected(action, 300)

net = tflearn.activation(
tf.matmul(net, t1.W) + tf.matmul(action, t2.W) + t2.b, activation='relu')

t1 = tf.keras.layers.Dense(300, use_bias=False)(net)
t2 = tf.keras.layers.Dense(300)(action)

net = tf.keras.layers.Activation(tf.nn.relu)(tf.keras.layers.Add()([t1, t2]))

# linear layer connected to 1 output representing Q(s,a)
# Weights are init to Uniform[-3e-3, 3e-3]
w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003)
out = tflearn.fully_connected(net, 1, weights_init=w_init)
out = tf.keras.layers.Dense(1, kernel_initializer=w_init)(net)
return inputs, action, out

def train(self, inputs, action, predicted_q_value):
Expand Down
42 changes: 29 additions & 13 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tensorflow as tf
import numpy as np
import ddpg
from tensorflow.python.saved_model import tag_constants

env = gym.make('Pendulum-v0')

Expand Down Expand Up @@ -34,7 +35,7 @@ def train(actor, critic, action, reward, state, state2, done):
critic.update_target_network()

buffer.add(np.reshape(state, (3,)),
np.reshape(action, (2,)),
np.reshape(action, (1,)),
reward,
done,
np.reshape(state2, (3,)))
Expand All @@ -44,7 +45,8 @@ def train(actor, critic, action, reward, state, state2, done):
s_batch, a_batch, r_batch, t_batch, s2_batch = buffer.sample_batch(batch_size)

# Calculate targets
target_q = critic.predict_target(s2_batch, actor.predict_target(s2_batch))
a = actor.predict_target(s2_batch)
target_q = critic.predict_target(s2_batch, a)

yi = []
#print(r_batch)
Expand All @@ -67,14 +69,19 @@ def train(actor, critic, action, reward, state, state2, done):


with tf.Session() as sess:
actor = ddpg.Actor(sess, 3, 2, np.array([-2, 2]), 0.0001, 0.001, 64)
actor = ddpg.Actor(sess, 3, 1, 2, 0.0001, 0.001, 64)

critic = ddpg.Critic(sess, 3, 2, 0.001, 0.001, 0.99, actor.get_num_trainable_vars())
critic = ddpg.Critic(sess, 3, 1, 0.001, 0.001, 0.99, actor.get_num_trainable_vars())

summary_ops, summary_vars = build_summaries()

sess.run(tf.global_variables_initializer())

saver = tf.train.Saver()
try:
saver.restore(sess, "save/model.ckpt")
except Exception as e:
print('ERROR LOAD')

writer = tf.summary.FileWriter('./logs', sess.graph)

Expand All @@ -89,9 +96,12 @@ def train(actor, critic, action, reward, state, state2, done):
while not done:
env.render()

a = actor.predict(obs.reshape((1, 3))) + ou()
noise = ou()
a = actor.predict(obs.reshape((1, 3)))
a = a * 2
a += noise

obs2, reward, done, info = env.step(a[0])
obs2, reward, done, info = env.step(a)

total_reward += reward

Expand All @@ -101,13 +111,19 @@ def train(actor, critic, action, reward, state, state2, done):
#print(reward)
#time.sleep(0.5)
print('average_max_q: ', ep_ave_max_q_value / float(j), 'reward: ', total_reward, 'episode:', episode)
summary_str = sess.run(summary_ops, feed_dict={
summary_vars[0]: total_reward,
summary_vars[1]: ep_ave_max_q_value / float(j)
})

writer.add_summary(summary_str, episode)
writer.flush()
saver = tf.train.Saver()
saver.save(sess, 'save/model.ckpt')

try:
summary_str = sess.run(summary_ops, feed_dict={
summary_vars[0]: tf.constant(total_reward),
summary_vars[1]: tf.constant(ep_ave_max_q_value / float(j))
})

#writer.add_summary(summary_str, episode)
#writer.flush()
except Exception as e:
print(e.__repr__)


env.close()
Expand Down

0 comments on commit 18be18a

Please sign in to comment.