Skip to content

Commit

Permalink
Fixing issues that were noted by the udacity reviewer
Browse files Browse the repository at this point in the history
  • Loading branch information
Vincent Trudel-Lapierre committed Aug 4, 2020
1 parent c08b674 commit 0a058f0
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 62 deletions.
24 changes: 2 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,8 @@ The environment is considered solved, when the average (over 100 episodes) of th

(_For AWS_) If you'd like to train the agent on AWS (and have not [enabled a virtual screen](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-on-Amazon-Web-Service.md)), then please use [this link](https://s3-us-west-1.amazonaws.com/udacity-drlnd/P3/Tennis/Tennis_Linux_NoVis.zip) to obtain the "headless" version of the environment. You will **not** be able to watch the agent without enabling a virtual screen, but you will be able to train the agent. (_To watch the agent, you should follow the instructions to [enable a virtual screen](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-on-Amazon-Web-Service.md), and then download the environment for the **Linux** operating system above._)

2. Place the file in the DRLND GitHub repository, in the `p3_collab-compet/` folder, and unzip (or decompress) the file.
2. Place the environment file at to root of this repository (https://github.com/vincenttl/deep-reinforcement-learning-p3)

### Instructions

Follow the instructions in `Tennis.ipynb` to get started with training your own agent!

### (Optional) Challenge: Crawler Environment

After you have successfully completed the project, you might like to solve the more difficult **Soccer** environment.

![Soccer][image2]

In this environment, the goal is to train a team of agents to play soccer.

You can read more about this environment in the ML-Agents GitHub [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md#soccer-twos). To solve this harder task, you'll need to download a new Unity environment. (**Note**: Udacity students should not submit a project with this new environment.)

You need only select the environment that matches your operating system:
- Linux: [click here](https://s3-us-west-1.amazonaws.com/udacity-drlnd/P3/Soccer/Soccer_Linux.zip)
- Mac OSX: [click here](https://s3-us-west-1.amazonaws.com/udacity-drlnd/P3/Soccer/Soccer.app.zip)
- Windows (32-bit): [click here](https://s3-us-west-1.amazonaws.com/udacity-drlnd/P3/Soccer/Soccer_Windows_x86.zip)
- Windows (64-bit): [click here](https://s3-us-west-1.amazonaws.com/udacity-drlnd/P3/Soccer/Soccer_Windows_x86_64.zip)

Then, place the file in the `p3_collab-compet/` folder in the DRLND GitHub repository, and unzip (or decompress) the file. Next, open `Soccer.ipynb` and follow the instructions to learn how to use the Python API to control the agent.

(_For AWS_) If you'd like to train the agents on AWS (and have not [enabled a virtual screen](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-on-Amazon-Web-Service.md)), then please use [this link](https://s3-us-west-1.amazonaws.com/udacity-drlnd/P3/Soccer/Soccer_Linux_NoVis.zip) to obtain the "headless" version of the environment. You will **not** be able to watch the agents without enabling a virtual screen, but you will be able to train the agents. (_To watch the agents, you should follow the instructions to [enable a virtual screen](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-on-Amazon-Web-Service.md), and then download the environment for the **Linux** operating system above._)
Follow the instructions in `Tennis.ipynb` to view how the agent was trained and to check the an example run with the trained agent
36 changes: 32 additions & 4 deletions Tennis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
"Score (max over agents) from episode 2: 0.0\n",
"Score (max over agents) from episode 3: 0.0\n",
"Score (max over agents) from episode 4: 0.0\n",
"Score (max over agents) from episode 5: 0.10000000149011612\n"
"Score (max over agents) from episode 5: 0.0\n"
]
}
],
Expand Down Expand Up @@ -241,7 +241,33 @@
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"from ddpg_agent import Agent # Agent taken from https://github.com/udacity/deep-reinforcement-learning/blob/master/ddpg-pendulum/ddpg_agent.py\n"
"from ddpg_agent import Agent # Agent taken from https://github.com/udacity/deep-reinforcement-learning/blob/master/ddpg-pendulum/ddpg_agent.py"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-9-2c674bb1446c>, line 3)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-9-2c674bb1446c>\"\u001b[0;36m, line \u001b[0;32m3\u001b[0m\n\u001b[0;31m 'batch_size' : 512 # minibatch size\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"hyperparams = {\n",
" 'buffer_size' : int(1e6), # replay buffer size\n",
" 'batch_size' : 512, # minibatch size\n",
" 'gamma' : 0.99, # discount factor\n",
" 'tau' : 2e-1, # for soft update of target parameters\n",
" 'lr_actor' : 1e-4, # learning rate of the actor \n",
" 'lr_critic' : 3e-4, # learning rate of the critic\n",
" 'weight_decay' : 0.0000 # L2 weight decay\n",
"}"
]
},
{
Expand All @@ -250,7 +276,7 @@
"metadata": {},
"outputs": [],
"source": [
"agent = Agent(state_size=state_size, action_size=action_size, random_seed=3)"
"agent = Agent(state_size=state_size, action_size=action_size, hyperparams=hyperparams)"
]
},
{
Expand Down Expand Up @@ -2124,7 +2150,9 @@
},
"outputs": [],
"source": [
"agents = [Agent(state_size=state_size, action_size=action_size, random_seed=3) for n in range(num_agents)]\n",
"\n",
"\n",
"agents = [Agent(state_size=state_size, action_size=action_size, hyperparams=hyperparams) for n in range(num_agents)]\n",
"for n in range(num_agents):\n",
" agents[n].actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))\n",
" agents[n].critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))"
Expand Down
60 changes: 30 additions & 30 deletions ddpg_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,57 +9,58 @@
import torch.nn.functional as F
import torch.optim as optim

BUFFER_SIZE = int(1e6) # replay buffer size
BATCH_SIZE = 512 # minibatch size
GAMMA = 0.99 # discount factor
TAU = 2e-1 # for soft update of target parameters
LR_ACTOR = 1e-4 # learning rate of the actor
LR_CRITIC = 3e-4 # learning rate of the critic
WEIGHT_DECAY = 0.0000 # L2 weight decay


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Agent():
"""Interacts with and learns from the environment."""

def __init__(self, state_size, action_size, random_seed):
def __init__(self, state_size, action_size, hyperparams):
"""Initialize an Agent object.
Params
======
state_size (int): dimension of each state
action_size (int): dimension of each action
random_seed (int): random seed
hyperparams (dict): dictionary of hyperparamters
"""
self.state_size = state_size
self.action_size = action_size
self.seed = random.seed(random_seed)

self.buffer_size = hyperparams['buffer_size'] # replay buffer size
self.batch_size = hyperparams['batch_size'] # minibatch size
self.gamma = hyperparams['gamma'] # discount factor
self.tau = hyperparams['tau'] # for soft update of target parameters
self.lr_actor = hyperparams['lr_actor'] # learning rate of the actor
self.lr_critic = hyperparams['lr_critic'] # learning rate of the critic
self.weight_decay = hyperparams['weight_decay'] # L2 weight decay

# Actor Network (w/ Target Network)
self.actor_local = Actor(state_size, action_size, random_seed).to(device)
self.actor_target = Actor(state_size, action_size, random_seed).to(device)
self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=LR_ACTOR)
self.actor_local = Actor(state_size, action_size).to(device)
self.actor_target = Actor(state_size, action_size).to(device)
self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.lr_actor)

# Critic Network (w/ Target Network)
self.critic_local = Critic(state_size, action_size, random_seed).to(device)
self.critic_target = Critic(state_size, action_size, random_seed).to(device)
self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=LR_CRITIC, weight_decay=WEIGHT_DECAY)
self.critic_local = Critic(state_size, action_size).to(device)
self.critic_target = Critic(state_size, action_size).to(device)
self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.lr_critic, weight_decay=self.weight_decay)

# Noise process
self.noise = OUNoise(action_size, random_seed)
self.noise = OUNoise(action_size)

# Replay memory
self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, random_seed)
self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size)

def step(self, state, action, reward, next_state, done):
"""Save experience in replay memory, and use random sample from buffer to learn."""
# Save experience / reward
self.memory.add(state, action, reward, next_state, done)

# Learn, if enough samples are available in memory
if len(self.memory) > BATCH_SIZE:
if len(self.memory) > self.batch_size:
experiences = self.memory.sample()
self.learn(experiences, GAMMA)
self.learn(experiences, self.gamma)

def act(self, state, add_noise=True):
"""Returns actions for given state as per current policy."""
Expand All @@ -73,6 +74,7 @@ def act(self, state, add_noise=True):
return np.clip(action, -1, 1)

def reset(self):
"""Resets the noise generation process"""
self.noise.reset()

def learn(self, experiences, gamma):
Expand Down Expand Up @@ -113,10 +115,10 @@ def learn(self, experiences, gamma):
self.actor_optimizer.step()

# ----------------------- update target networks ----------------------- #
self.soft_update(self.critic_local, self.critic_target, TAU)
self.soft_update(self.actor_local, self.actor_target, TAU)
self.soft_update(self.critic_local, self.critic_target)
self.soft_update(self.actor_local, self.actor_target)

def soft_update(self, local_model, target_model, tau):
def soft_update(self, local_model, target_model):
"""Soft update model parameters.
θ_target = τ*θ_local + (1 - τ)*θ_target
Expand All @@ -127,17 +129,16 @@ def soft_update(self, local_model, target_model, tau):
tau (float): interpolation parameter
"""
for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
target_param.data.copy_(tau*local_param.data + (1.0-tau)*target_param.data)
target_param.data.copy_(self.tau*local_param.data + (1.0-self.tau)*target_param.data)

class OUNoise:
"""Ornstein-Uhlenbeck process."""

def __init__(self, size, seed, mu=0., theta=0.15, sigma=0.2):
def __init__(self, size, mu=0., theta=0.15, sigma=0.2):
"""Initialize parameters and noise process."""
self.mu = mu * np.ones(size)
self.theta = theta
self.sigma = sigma
self.seed = random.seed(seed)
self.reset()

def reset(self):
Expand All @@ -147,14 +148,14 @@ def reset(self):
def sample(self):
"""Update internal state and return it as a noise sample."""
x = self.state
dx = self.theta * (self.mu - x) + self.sigma * np.array([random.random() for i in range(len(x))])
dx = self.theta * (self.mu - x) + self.sigma * numpy.random.randn(len(x))
self.state = x + dx
return self.state

class ReplayBuffer:
"""Fixed-size buffer to store experience tuples."""

def __init__(self, action_size, buffer_size, batch_size, seed):
def __init__(self, action_size, buffer_size, batch_size):
"""Initialize a ReplayBuffer object.
Params
======
Expand All @@ -165,13 +166,12 @@ def __init__(self, action_size, buffer_size, batch_size, seed):
self.memory = deque(maxlen=buffer_size) # internal memory (deque)
self.batch_size = batch_size
self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
self.seed = random.seed(seed)

def add(self, state, action, reward, next_state, done):
"""Add a new experience to memory."""
e = self.experience(state, action, reward, next_state, done)
self.memory.append(e)

def sample(self):
"""Randomly sample a batch of experiences from memory."""
experiences = random.sample(self.memory, k=self.batch_size)
Expand Down
11 changes: 5 additions & 6 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,30 @@
import torch.nn.functional as F

def hidden_init(layer):
"""Used to calculate the limits for weight initialization"""
fan_in = layer.weight.data.size()[0]
lim = 1. / np.sqrt(fan_in)
return (-lim, lim)

class Actor(nn.Module):
"""Actor (Policy) Model."""

def __init__(self, state_size, action_size, seed, fc_units=256):
def __init__(self, state_size, action_size, fc_units=256):
"""Initialize parameters and build model.
Params
======
state_size (int): Dimension of each state
action_size (int): Dimension of each action
seed (int): Random seed
fc1_units (int): Number of nodes in first hidden layer
fc2_units (int): Number of nodes in second hidden layer
"""
super(Actor, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(state_size, fc_units)
self.fc2 = nn.Linear(fc_units, action_size)
self.reset_parameters()

def reset_parameters(self):
"""Sets initial weights randomly WITH a uniform distribution"""
self.fc1.weight.data.uniform_(*hidden_init(self.fc1))
self.fc2.weight.data.uniform_(-3e-3, 3e-3)

Expand All @@ -41,25 +41,24 @@ def forward(self, state):
class Critic(nn.Module):
"""Critic (Value) Model."""

def __init__(self, state_size, action_size, seed, fcs1_units=256, fc2_units=256, fc3_units=128):
def __init__(self, state_size, action_size, fcs1_units=256, fc2_units=256, fc3_units=128):
"""Initialize parameters and build model.
Params
======
state_size (int): Dimension of each state
action_size (int): Dimension of each action
seed (int): Random seed
fcs1_units (int): Number of nodes in the first hidden layer
fc2_units (int): Number of nodes in the second hidden layer
"""
super(Critic, self).__init__()
self.seed = torch.manual_seed(seed)
self.fcs1 = nn.Linear(state_size, fcs1_units)
self.fc2 = nn.Linear(fcs1_units+action_size, fc2_units)
self.fc3 = nn.Linear(fc2_units, fc3_units)
self.fc4 = nn.Linear(fc3_units, 1)
self.reset_parameters()

def reset_parameters(self):
"""Sets initial weights randomly WITH a uniform distribution"""
self.fcs1.weight.data.uniform_(*hidden_init(self.fcs1))
self.fc2.weight.data.uniform_(*hidden_init(self.fc2))
self.fc3.weight.data.uniform_(*hidden_init(self.fc3))
Expand Down

0 comments on commit 0a058f0

Please sign in to comment.