Skip to content

Commit

Permalink
Revert "Features to save and reuse the trained models are now integra…
Browse files Browse the repository at this point in the history
…ted"
  • Loading branch information
pat-coady authored Mar 31, 2018
1 parent e9af493 commit 728ad75
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 80 deletions.
7 changes: 0 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,6 @@ Here are the key points:

During training, videos are periodically saved automatically to the /tmp folder. These can be enjoyable, and also instructive.

### Reuse the saved trained models

Example usage after running the updated 'train.py' that trains and saves the learned model:
```
python reload.py HalfCheetah-v1
```

### References

1. [Trust Region Policy Optimization](https://arxiv.org/pdf/1502.05477.pdf) (Schulman et al., 2016)
Expand Down
18 changes: 4 additions & 14 deletions src/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
Written by Patrick Coady (pat-coady.github.io)
"""
import numpy as np
import tensorflow as tf, os
import tensorflow as tf


class Policy(object):
""" NN-based policy approximation """
def __init__(self, env_name, obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar):
def __init__(self, obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar):
"""
Args:
obs_dim: num observation dimensions (int)
Expand All @@ -18,7 +18,6 @@ def __init__(self, env_name, obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar
hid1_mult: size of first hidden layer, multiplier of obs_dim
policy_logvar: natural log of initial policy variance
"""
self.env_name = env_name
self.beta = 1.0 # dynamically adjusted D_KL loss multiplier
self.eta = 50 # multiplier for D_KL-kl_targ hinge-squared loss
self.kl_targ = kl_targ
Expand All @@ -42,12 +41,8 @@ def _build_graph(self):
self._kl_entropy()
self._sample()
self._loss_train_op()
self._saver_object()
self.init = tf.global_variables_initializer()

def _saver_object(self):
self.saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=2)

def _placeholders(self):
""" Input placeholders"""
# observations, actions and advantages:
Expand Down Expand Up @@ -137,9 +132,9 @@ def _kl_entropy(self):

def _sample(self):
""" Sample from distribution, given observation """
self.sampled_act = tf.add(self.means,
self.sampled_act = (self.means +
tf.exp(self.log_vars / 2.0) *
tf.random_normal(shape=(self.act_dim,)), name='output_action')
tf.random_normal(shape=(self.act_dim,)))

def _loss_train_op(self):
"""
Expand Down Expand Up @@ -213,9 +208,4 @@ def update(self, observes, actions, advantages, logger):

def close_sess(self):
""" Close TensorFlow session """
model_directory = './saved_models/' + self.env_name + '/'
if not os.path.exists(model_directory):
os.makedirs(model_directory)
with self.g.as_default():
self.saver.save(self.sess, model_directory + 'final')
self.sess.close()
46 changes: 0 additions & 46 deletions src/reload.py

This file was deleted.

15 changes: 2 additions & 13 deletions src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import os
import argparse
import signal
import _pickle as pickle


class GracefulKiller:
Expand Down Expand Up @@ -280,10 +279,10 @@ def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, hid1_mult, pol
now = datetime.utcnow().strftime("%b-%d_%H:%M:%S") # create unique directories
logger = Logger(logname=env_name, now=now)
aigym_path = os.path.join('/tmp', env_name, now)
#env = wrappers.Monitor(env, aigym_path, force=True)
env = wrappers.Monitor(env, aigym_path, force=True)
scaler = Scaler(obs_dim)
val_func = NNValueFunction(obs_dim, hid1_mult)
policy = Policy(env_name, obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar)
policy = Policy(obs_dim, act_dim, kl_targ, hid1_mult, policy_logvar)
# run a few episodes of untrained policy to initialize scaler:
run_policy(env, policy, scaler, logger, episodes=5)
episode = 0
Expand All @@ -304,16 +303,6 @@ def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, hid1_mult, pol
if input('Terminate training (y/[n])? ') == 'y':
break
killer.kill_now = False

scale, offset = scaler.get()
data = {'SCALE': scale, 'OFFSET': offset}
directory_to_store_data = './saved_models/' + env_name + '/'
if not os.path.exists(directory_to_store_data):
os.makedirs(directory_to_store_data)
file_name = directory_to_store_data + 'scale_and_offset.pkl'
with open(file_name, 'wb') as f:
pickle.dump(data, f)

logger.close()
policy.close_sess()
val_func.close_sess()
Expand Down

0 comments on commit 728ad75

Please sign in to comment.