Skip to content

Commit

Permalink
moved duplicated get_available_actions function
Browse files Browse the repository at this point in the history
  • Loading branch information
WhatIThinkAbout committed Dec 15, 2020
1 parent 14099a1 commit 139a7e2
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 207 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "69cab65b8edb459c9d4c5ff4712e48e7",
"model_id": "74787ac6742c45d098a395775ecc524f",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -213,7 +213,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a6f6084fd38b4a8f8b713352ea1a7f0a",
"model_id": "3a2effcaf32f465186c0720b2fcfc905",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -267,7 +267,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "99192eaf2830438cab1af172d2fbdfd7",
"model_id": "d26086274bf24f1ab047700dbf4e92c2",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -457,7 +457,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "48d75e53b598491d968f972d03e41cdc",
"model_id": "9cf52d7c5be343f9809f0b3787e89bbd",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -510,7 +510,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "47d4fa35372c4fe89ad6b6989d444ec2",
"model_id": "6a31f86bf68148bea389d94da7cec3e2",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -548,7 +548,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ca7c1509ddd34e5bb592114a9c04ee9d",
"model_id": "c94ca81598764e5cbe8a9ba7cb523a25",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -605,7 +605,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "03a1d2df912e469a9aa6768df4d5cbb1",
"model_id": "98f2da83584548efaef0b1d7a7c0d3c1",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -650,7 +650,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4144e64510ae4f7d8073391b6d5fe370",
"model_id": "5588eb7c16774f2fa4f3c830f64e6e4a",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -753,7 +753,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "322c54266f844341a895c0ad67149165",
"model_id": "de786cc3dab641228914290eb9e70d74",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -838,7 +838,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "35866e7fa20e48c99decb65371f1b664",
"model_id": "eac1c844487a4f2b86d35abd4a8b1f26",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -887,7 +887,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "07bfada1015c4e8c8aeaf02906e655a5",
"model_id": "381e1577c1a241008e5e581861c21099",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -931,7 +931,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cd82cd9350dd4dbda355d5635f339350",
"model_id": "99e7d07f1fcc44d0ac3c4fafcabc1795",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -1045,7 +1045,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3d59a4ebe04e4b2da404abd421196ad2",
"model_id": "0f6e40243f57402d8e6d482590893a38",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -1158,7 +1158,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8e7d2230752744a3a19d5de26e7d2984",
"model_id": "438bfe06f726492dbd4cace16f581f46",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -1220,7 +1220,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "48790374bb3b4c46a355eb730483e073",
"model_id": "15e311ee438f4d1cae6da50540c12e57",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -1284,7 +1284,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0c4a204d4a98447c8c25174973919091",
"model_id": "10faf382628a4918853fd15c64754967",
"version_major": 2,
"version_minor": 0
},
Expand Down
46 changes: 45 additions & 1 deletion Reinforcement_Learning/lib/grid_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class GridLevel():
end_color = 'green' # color of the exit square

maze = None # instance of maze if defined
debug_maze = True # write the maze to a svg file
debug_maze = False # write the maze to a svg file

save_images = False # enable writing canvas as an image

Expand Down Expand Up @@ -121,6 +121,50 @@ def save_to_file(self, file_name):

def get_canvas_dimensions(self):
return [self.total_width,self.total_height]


def get_available_actions(self,x,y,policy=None):
''' return the list of available actions for the specified position in the grid '''

# test if the level contains a maze
if self.maze is not None:
cell = self.maze.cell_at( x, y )

# if a wall is present then that direction is not possible as an action
actions = {k: not v for k, v in cell.walls.items()}
else:
# initially start with all actions being possible
actions = {'N':True,'E':True,'S':True,'W':True}

# if the center area is not part of the level then remove any actions that would move there
if self.fill_center == True:
if ((x >= 1 and x <= self.width-2) and (y >= 1 and y <= self.height-2)):
actions = {}
else:
if ((x >= 1 and x <= self.width-2) and (y == 0)): del actions['S']
elif ((x >= 1 and x <= self.width-2) and (y == self.height-1)): del actions['N']
elif ((y >= 1 and y <= self.height-2) and (x == 0)): del actions['E']
elif ((y >= 1 and y <= self.height-2) and (x == self.width-1)): del actions['W']

# remove actions that would move off the edges of the grid
if x == 0: del actions['W']
if x == self.width-1: del actions['E']
if y == 0: del actions['N']
if y == self.height-1: del actions['S']

# test if a policy has been defined
if policy is not None:
# set any allowed actions to false if they're not in the policy
dir_value = policy[y,x]
for direction,v in actions.items():
if v == True:
if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False
if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False
if (direction == 'E') and not (dir_value & Direction.East): actions['E'] = False
if (direction == 'W') and not (dir_value & Direction.West): actions['W'] = False

return actions


'''
Draw Functions
Expand Down
33 changes: 4 additions & 29 deletions Reinforcement_Learning/lib/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def calculate_greedy_directions(self,values):
return directions

def calculate_cell_directions(self,x,y,values):
actions = self.get_available_actions(x,y)
actions = self.level.get_available_actions(x,y)

directions = 0
dir_value = 0
Expand All @@ -70,33 +70,6 @@ def calculate_cell_directions(self,x,y,values):
elif value == best_value: directions += dir_value

return int(directions)


def get_available_actions(self,x,y):
# test if the level contains a maze
if self.maze is not None:
cell = self.maze.cell_at( x, y )

# if a wall is present then that direction is not possible as an action
actions = {k: not v for k, v in cell.walls.items()}
else:
actions = {'N':True,'E':True,'S':True,'W':True}
if self.level.fill_center == True:
if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)):
actions = {}
else:
if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S']
elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N']
elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E']
elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W']

if x == 0: del actions['W']
if x == self.level.width-1: del actions['E']
if y == 0: del actions['N']
if y == self.level.height-1: del actions['S']

return actions


def get_allowed_actions(self,x,y):

Expand All @@ -106,7 +79,9 @@ def get_allowed_actions(self,x,y):
end = self.level.get_end()
if x != end[0] or y != end[1]:

actions = self.get_available_actions(x,y)
# get the actions available for the level
actions = self.level.get_available_actions(x,y)

for direction,v in actions.items():
# test the action is allowed
if v == True:
Expand Down
41 changes: 2 additions & 39 deletions Reinforcement_Learning/lib/policy_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ class PolicyEvaluation():

def __init__(self,level,discount_factor = 1):
self.level = level
self.maze = level.maze
self.start_values = np.zeros((level.height,level.width))
self.end_values = np.zeros((level.height,level.width))
self.discount_factor = discount_factor
Expand Down Expand Up @@ -71,7 +70,7 @@ def get_state_value(self,pos):
return self.start_values[y,x]

def calculate_cell_value(self,x,y):
actions = self.get_available_actions(x,y)
actions = self.level.get_available_actions(x,y,self.policy)

# check that some actions are possible in this state
if not actions: return 0
Expand All @@ -97,43 +96,7 @@ def calculate_cell_value(self,x,y):
if not num_actions: return 0

# for equal probability of taking an action its just the mean of all actions
return value/num_actions

def get_available_actions(self,x,y):
# test if the level contains a maze
if self.maze is not None:
cell = self.maze.cell_at( x, y )

# if a wall is present then that direction is not possible as an action
actions = {k: not v for k, v in cell.walls.items()}
else:
actions = {'N':True,'E':True,'S':True,'W':True}
if self.level.fill_center == True:
if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)):
actions = {}
else:
if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S']
elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N']
elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E']
elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W']

if x == 0: del actions['W']
if x == self.level.width-1: del actions['E']
if y == 0: del actions['N']
if y == self.level.height-1: del actions['S']

# test if a policy has been defined
if self.policy is not None:
# set any allowed actions to false if they're not in the policy
dir_value = self.policy[y,x]
for direction,v in actions.items():
if v == True:
if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False
if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False
if (direction == 'E') and not (dir_value & Direction.East): actions['E'] = False
if (direction == 'W') and not (dir_value & Direction.West): actions['W'] = False

return actions
return value/num_actions

def set_policy(self,policy):
''' set the policy to be evaluated '''
Expand Down
48 changes: 3 additions & 45 deletions Reinforcement_Learning/lib/value_iteration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from grid_level import GridLevel


''' implement the Value Iteration algorithm '''
Expand All @@ -7,8 +8,7 @@ class ValueIteration():
policy = None

def __init__(self,level,discount_factor=0.9):
self.level = level
self.maze = level.maze
self.level = level
self.values = np.zeros((level.height,level.width))
self.discount_factor = discount_factor

Expand All @@ -18,54 +18,12 @@ def get_state_value(self,pos):
y = pos[1]
if (x < 0 or x >= self.level.width) or (y < 0 or y >= self.level.height): return 0
return self.values[y,x]

def get_available_actions(self,x,y):
''' return the list of available actions for the specified position in the grid '''

# test if the level contains a maze
if self.maze is not None:
cell = self.maze.cell_at( x, y )

# if a wall is present then that direction is not possible as an action
actions = {k: not v for k, v in cell.walls.items()}
else:
# initially start with all actions being possible
actions = {'N':True,'E':True,'S':True,'W':True}

# if the center area is not part of the level then remove any actions that would move there
if self.level.fill_center == True:
if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)):
actions = {}
else:
if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S']
elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N']
elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E']
elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W']

# remove actions that would move off the edges of the grid
if x == 0: del actions['W']
if x == self.level.width-1: del actions['E']
if y == 0: del actions['N']
if y == self.level.height-1: del actions['S']

# test if a policy has been defined
if self.policy is not None:
# set any allowed actions to false if they're not in the policy
dir_value = self.policy[y,x]
for direction,v in actions.items():
if v == True:
if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False
if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False
if (direction == 'E') and not (dir_value & Direction.East): actions['E'] = False
if (direction == 'W') and not (dir_value & Direction.West): actions['W'] = False

return actions

def calculate_max_action_value(self,x,y):
''' calculate the values of all actions in the specified cell and return the largest of these '''

# get the list of available actions for this cell
actions = self.get_available_actions(x,y)
actions = self.level.get_available_actions(x,y,self.policy)

# check that some actions are possible in this state
if not actions: return 0
Expand Down
Loading

0 comments on commit 139a7e2

Please sign in to comment.