Skip to content

Commit

Permalink
TRAIN MODE activate, add launch, put image
Browse files Browse the repository at this point in the history
  • Loading branch information
Gouet committed Oct 14, 2019
1 parent 1ba8d2b commit 721cdc9
Show file tree
Hide file tree
Showing 11 changed files with 23 additions and 7 deletions.
Binary file removed av_q_max.png
Binary file not shown.
Binary file added images/average_q_max.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/rewards.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions launch tensorboard.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
call C:\Users\Victor\Anaconda3\Scripts\activate.bat
call conda activate GYM_ENV_RL

tensorboard --logdir=./logs --host localhost --port 8000
pause
5 changes: 5 additions & 0 deletions launch.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
call C:\Users\Victor\Anaconda3\Scripts\activate.bat
call conda activate GYM_ENV_RL

python train.py
pause
Binary file removed reward.png
Binary file not shown.
Binary file added saved/actor.h5
Binary file not shown.
Binary file added saved/critic.h5
Binary file not shown.
Binary file added saved_works/actor.h5
Binary file not shown.
Binary file added saved_works/critic.h5
Binary file not shown.
20 changes: 13 additions & 7 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import ddpg
import os

TRAIN_MODE = False

try:
os.mkdir('./saved')
except OSError:
Expand Down Expand Up @@ -86,28 +88,32 @@ def train(action, reward, state, state2, done):
ep_ave_max_q_value = 0
total_reward = 0
while not done:
env.render()
if not TRAIN_MODE:
env.render()
obs = obs.reshape((1, 3))

noise = ou()
action = actor.model.predict(obs)

action = action + noise
if TRAIN_MODE:
action = action + noise

obs2, reward, done, info = env.step(action)
total_reward += reward

train(action, reward, obs, obs2.reshape((1, 3)), done)
if TRAIN_MODE:
train(action, reward, obs, obs2.reshape((1, 3)), done)
obs = obs2
j += 1

with writer.as_default(), tf.contrib.summary.always_record_summaries():
tf.contrib.summary.scalar("average_max_q", ep_ave_max_q_value / float(j))
tf.contrib.summary.scalar("reward", total_reward)

critic.save()
actor.save()
if TRAIN_MODE:
critic.save()
actor.save()

print('average_max_q: ', ep_ave_max_q_value / float(j), 'reward: ', total_reward, 'episode:', episode)
print('average_max_q: ', ep_ave_max_q_value / float(j), 'reward: ', total_reward, 'episode:', episode)

env.close()

0 comments on commit 721cdc9

Please sign in to comment.