TRAIN MODE activate, add launch, put image

Gouet · Oct 14, 2019 · 721cdc9 · 721cdc9
1 parent 1ba8d2b
commit 721cdc9
Show file tree

Hide file tree

Showing 11 changed files with 23 additions and 7 deletions.
diff --git a/av_q_max.png b/av_q_max.png
diff --git a/images/average_q_max.PNG b/images/average_q_max.PNG
diff --git a/images/rewards.PNG b/images/rewards.PNG
diff --git a/launch tensorboard.bat b/launch tensorboard.bat
@@ -0,0 +1,5 @@
+call C:\Users\Victor\Anaconda3\Scripts\activate.bat
+call conda activate GYM_ENV_RL
+
+tensorboard --logdir=./logs --host localhost --port 8000
+pause
diff --git a/launch.bat b/launch.bat
@@ -0,0 +1,5 @@
+call C:\Users\Victor\Anaconda3\Scripts\activate.bat
+call conda activate GYM_ENV_RL
+
+python train.py
+pause
diff --git a/reward.png b/reward.png
diff --git a/saved/actor.h5 b/saved/actor.h5
diff --git a/saved/critic.h5 b/saved/critic.h5
diff --git a/saved_works/actor.h5 b/saved_works/actor.h5
diff --git a/saved_works/critic.h5 b/saved_works/critic.h5
diff --git a/train.py b/train.py
@@ -6,6 +6,8 @@
 import ddpg
 import os
 
+TRAIN_MODE = False
+
 try:  
     os.mkdir('./saved')
 except OSError:  
@@ -86,28 +88,32 @@ def train(action, reward, state, state2, done):
     ep_ave_max_q_value = 0
     total_reward = 0
     while not done:
-        env.render()
+        if not TRAIN_MODE:
+            env.render()
         obs = obs.reshape((1, 3))
-
+        
         noise = ou()
         action = actor.model.predict(obs)
 
-        action = action + noise
+        if TRAIN_MODE:
+            action = action + noise
 
         obs2, reward, done, info = env.step(action)
         total_reward += reward
 
-        train(action, reward, obs, obs2.reshape((1, 3)), done)
+        if TRAIN_MODE:
+            train(action, reward, obs, obs2.reshape((1, 3)), done)
         obs = obs2
         j += 1
 
     with writer.as_default(), tf.contrib.summary.always_record_summaries():
         tf.contrib.summary.scalar("average_max_q", ep_ave_max_q_value / float(j))
         tf.contrib.summary.scalar("reward", total_reward)
 
-    critic.save()
-    actor.save()
+    if TRAIN_MODE:
+        critic.save()
+        actor.save()
 
-    print('average_max_q: ', ep_ave_max_q_value / float(j), 'reward: ', total_reward, 'episode:', episode)
+        print('average_max_q: ', ep_ave_max_q_value / float(j), 'reward: ', total_reward, 'episode:', episode)
 
 env.close()