-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsimple_env.py
82 lines (63 loc) · 2.44 KB
/
simple_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gym
from gym import spaces, logger
from gym.utils import seeding
import numpy as np
class SimpleTestEnv(gym.Env):
"""
Description:
State does not change or progress, the goal is to simply minimize the reward.
Observation:
Type: Box(2)
Num Observation Min Max
0 x_1 -10 10
1 x_2 -10 10
Actions:
Type: Box(1)
Num Action Min Max
0 An arbitrary action -10 10
Reward:
Reward is 1 for action 0, but 1/action for all others
Starting State:
State is x = [0, 0]
Episode Termination:
If more than 20 steps in a row are taken without selecting action 0
"""
def __init__(self):
self.steps_without_correct_action = 0
self.action_space = spaces.Box(-10, 10, shape=(1,))
self.observation_space = spaces.Box(-np.inf, np.inf, shape=(2,)) #, dtype=np.float32
self.seed()
self.state = None
self.steps_beyond_done = None
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
@staticmethod
def reward(state, action):
reward = 1.0
if action != 0.0:
reward = np.abs(action)/10.0
return reward
def step(self, action):
if action <= -10.0 and action >= 10.0: raise Exception
reward = self.reward(self.state, action)
if action != 0.0:
self.steps_without_correct_action += 1.0
done = self.steps_without_correct_action >= 20
if done:
if self.steps_beyond_done is None:
self.steps_beyond_done = 0
elif self.steps_beyond_done == 0:
logger.warn(
"You are calling 'step()' even though this "
"environment has already returned done = True. You "
"should always call 'reset()' once you receive 'done = "
"True' -- any further steps are undefined behavior."
)
self.steps_beyond_done += 1
return np.array(self.state), reward, done, {}
def reset(self):
self.state = [0,0]
self.steps_beyond_done = None
self.steps_without_correct_action = 0
return np.array(self.state)