-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcallbacks.py
56 lines (42 loc) · 1.61 KB
/
callbacks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from rl.callbacks import Callback
from collections import OrderedDict
class Logger2048(Callback):
"""
Specific logger for 2048 env. Saves env's score, highest value, etc.
"""
POSSIBLE_ACTIONS = ["up", "down", "left", "right"]
def __init__(self, verbose=True):
self.episodes = []
self.verbose = verbose
def on_episode_begin(self, episode, logs={}):
self.current_episode = episode
pass
def on_episode_end(self, episode, logs={}):
n_steps = self.env.n_steps_valid + self.env.n_steps_invalid
if self.verbose:
print("SCORE:", self.env.score,
"\tHIGHEST VALUE:", self.env.highest_value,
"\tVALID STEPS:", self.env.n_steps_valid,
"\tINVALID STEPS:", self.env.n_steps_invalid,
"\tTOTAL STEPS:", n_steps)
self.episodes.append(OrderedDict([
("episode_score", self.env.score),
("highest_value", self.env.highest_value),
("nb_env_steps", n_steps),
("nb_env_steps_valid", self.env.n_steps_valid),
("nb_env_steps_invalid", self.env.n_steps_invalid),
("episode_reward", logs["episode_reward"]),
("random_seed", self.env.random_seed)
]))
def on_step_begin(self, step, logs={}):
pass
def on_step_end(self, step, logs={}):
pass
def on_action_begin(self, action, logs={}):
pass
def on_action_end(self, action, logs={}):
pass
def on_epoch_begin(self, epoch, logs={}):
pass
def on_epoch_end(self, epoch, logs={}):
pass