-
Notifications
You must be signed in to change notification settings - Fork 0
/
Environment.hs
67 lines (62 loc) · 2.6 KB
/
Environment.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
{-# LANGUAGE ExistentialQuantification #-}
module Environment where
import Util
import Data.BitVector
class (Show e) => Environment e where
performAction :: Action -> e -> e
isFinished :: e -> Bool
isFinished _ = False
getObservation :: e -> Percept
getReward :: e -> Percept
-- The number of bits required to represent an action
actionBits :: e -> Int
actionBits e = integerWidth $ maxAction e
-- The number of bits required to represent an observation
observationBits:: e -> Int
observationBits e = integerWidth $ maxObservation e
rewardBits :: e -> Int
rewardBits e = integerWidth $ maxReward e
perceptBits :: e -> Int
perceptBits e = observationBits e + rewardBits e
maxAction :: e -> Action
--maxAction _ = 0
maxObservation :: e -> Percept
--maxObservation _ = 0
maxReward :: e -> Percept
--maxReward _ = 0
minAction :: e -> Action
minAction _ = 0
minObservation :: e -> Percept
minObservation _ = 0
minReward :: e -> Percept
minReward _ = 0
isValidAction :: e -> Action -> Bool
isValidAction e act = (minAction e) <= act && act <= (maxAction e)
isValidObservation :: e -> Percept -> Bool
isValidObservation e per =
(minObservation e) <= per && per <= (maxObservation e)
isValidReward :: e -> Percept -> Bool
isValidReward e per = (minReward e) <= per && per <= (maxReward e)
makeNewEnvironment :: Options -> e
data EnvironmentP = forall a. Environment a => EnvironmentP a
instance Show EnvironmentP where
show (EnvironmentP e) = show e
instance Environment EnvironmentP where
performAction a (EnvironmentP e) = EnvironmentP (performAction a e)
isFinished (EnvironmentP e) = isFinished e
getObservation (EnvironmentP e) = getObservation e
getReward (EnvironmentP e) = getReward e
actionBits (EnvironmentP e) = actionBits e
observationBits (EnvironmentP e) = observationBits e
rewardBits (EnvironmentP e) = rewardBits e
perceptBits (EnvironmentP e) = perceptBits e
maxAction (EnvironmentP e) = maxAction e
maxObservation (EnvironmentP e) = maxObservation e
maxReward (EnvironmentP e) = maxReward e
minAction (EnvironmentP e) = minAction e
minObservation (EnvironmentP e) = minObservation e
minReward (EnvironmentP e) = minReward e
isValidAction (EnvironmentP e) = isValidAction e
isValidObservation (EnvironmentP e) = isValidObservation e
isValidReward (EnvironmentP e) = isValidReward e
makeNewEnvironment opts = error "EnvironmentP is an abstract type"