Skip to content

Commit

Permalink
accurate Q-values
Browse files Browse the repository at this point in the history
  • Loading branch information
tychovdo committed Mar 27, 2017
1 parent 300b907 commit fc3a3f7
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions pacmanDQN_Agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def __init__(self, args):
self.qnet = DQN(self.params)

# Q and cost
self.Q_global = 0
self.Q_global = []
self.cost_disp = 0

# Stats
Expand Down Expand Up @@ -94,7 +94,7 @@ def getMove(self, state):
self.qnet.terminals: np.zeros(1),
self.qnet.rewards: np.zeros(1)})[0]

self.Q_global = max(self.Q_global, np.amax(self.Q_pred))
self.Q_global.append(max(self.Q_pred))
a_winner = np.argwhere(self.Q_pred == np.amax(self.Q_pred))

if len(a_winner) > 1:
Expand Down Expand Up @@ -198,7 +198,7 @@ def final(self, state):
# Print stats
sys.stdout.write("# %4d | steps: %5d | steps_t: %5d | t: %4f | r: %12f | e: %10f " %
(self.numeps,self.local_cnt, self.cnt, time.time()-self.s, self.ep_rew, self.params['eps']))
sys.stdout.write("| Q: %10f | won: %r \n" % (self.Q_global, self.won))
sys.stdout.write("| Q: %10f | won: %r \n" % ((max(self.Q_global, default=float('nan')), self.won)))
sys.stdout.flush()

def train(self):
Expand Down Expand Up @@ -364,7 +364,7 @@ def registerInitialState(self, state): # inspects the starting state
# Reset vars
self.terminal = None
self.won = True
self.Q_global = 0
self.Q_global = []
self.delay = 0

# Next
Expand Down

0 comments on commit fc3a3f7

Please sign in to comment.