Skip to content

Commit

Permalink
Merge branch 'trebuchet' of github.com:MikeInnes/differentiable-rl in…
Browse files Browse the repository at this point in the history
…to trebuchet
  • Loading branch information
tejank10 committed Mar 3, 2019
2 parents 87794b3 + 6708baa commit 237c09f
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 6 deletions.
2 changes: 1 addition & 1 deletion games/pendulum/DDPG.jl
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,11 @@ function episode!(env, train=true)
a = action(s, train)
s′, r, done, _ = step!(env, a)
total_reward += data(r)[1]
s = s′
if train
remember(s, a, r, s′, done)
replay()
end
s = s′
end
total_reward
end
Expand Down
7 changes: 2 additions & 5 deletions games/pendulum/DiffRL.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ env = PendulumEnv()
STATE_SIZE = length(reset!(env)) # returns state from obs space
ACTION_SIZE = 1#length(env.actions)
ACTION_BOUND = 2#env.action_space.hi
MAX_REWARD = 0f0 # Max reward in a timestep
MAX_EP = 15_000
MAX_EP_LENGTH = 1000
SEQ_LEN = 4
Expand All @@ -28,11 +29,7 @@ model = Chain(Dense(STATE_SIZE, 24, relu),

opt = ADAM(η)

function loss(r)
seq_len = size(r, 1)
z = zeros(Float32, seq_len) |> gpu
Flux.mse(r, z)
end
loss(r) = Flux.mse(r, MAX_REWARD)

# ----------------------------- Helper Functions -------------------------------

Expand Down

0 comments on commit 237c09f

Please sign in to comment.