diff --git a/Reinforcement_Learning/An Introduction To Reinforcement Learning.ipynb b/Reinforcement_Learning/An Introduction To Reinforcement Learning.ipynb index 9cb9bc0..2f4354f 100644 --- a/Reinforcement_Learning/An Introduction To Reinforcement Learning.ipynb +++ b/Reinforcement_Learning/An Introduction To Reinforcement Learning.ipynb @@ -147,7 +147,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "69cab65b8edb459c9d4c5ff4712e48e7", + "model_id": "74787ac6742c45d098a395775ecc524f", "version_major": 2, "version_minor": 0 }, @@ -213,7 +213,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a6f6084fd38b4a8f8b713352ea1a7f0a", + "model_id": "3a2effcaf32f465186c0720b2fcfc905", "version_major": 2, "version_minor": 0 }, @@ -267,7 +267,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "99192eaf2830438cab1af172d2fbdfd7", + "model_id": "d26086274bf24f1ab047700dbf4e92c2", "version_major": 2, "version_minor": 0 }, @@ -457,7 +457,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "48d75e53b598491d968f972d03e41cdc", + "model_id": "9cf52d7c5be343f9809f0b3787e89bbd", "version_major": 2, "version_minor": 0 }, @@ -510,7 +510,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "47d4fa35372c4fe89ad6b6989d444ec2", + "model_id": "6a31f86bf68148bea389d94da7cec3e2", "version_major": 2, "version_minor": 0 }, @@ -548,7 +548,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ca7c1509ddd34e5bb592114a9c04ee9d", + "model_id": "c94ca81598764e5cbe8a9ba7cb523a25", "version_major": 2, "version_minor": 0 }, @@ -605,7 +605,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "03a1d2df912e469a9aa6768df4d5cbb1", + "model_id": "98f2da83584548efaef0b1d7a7c0d3c1", "version_major": 2, "version_minor": 0 }, @@ -650,7 +650,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4144e64510ae4f7d8073391b6d5fe370", + "model_id": "5588eb7c16774f2fa4f3c830f64e6e4a", "version_major": 2, "version_minor": 0 }, @@ -753,7 +753,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "322c54266f844341a895c0ad67149165", + "model_id": "de786cc3dab641228914290eb9e70d74", "version_major": 2, "version_minor": 0 }, @@ -838,7 +838,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "35866e7fa20e48c99decb65371f1b664", + "model_id": "eac1c844487a4f2b86d35abd4a8b1f26", "version_major": 2, "version_minor": 0 }, @@ -887,7 +887,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "07bfada1015c4e8c8aeaf02906e655a5", + "model_id": "381e1577c1a241008e5e581861c21099", "version_major": 2, "version_minor": 0 }, @@ -931,7 +931,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cd82cd9350dd4dbda355d5635f339350", + "model_id": "99e7d07f1fcc44d0ac3c4fafcabc1795", "version_major": 2, "version_minor": 0 }, @@ -1045,7 +1045,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3d59a4ebe04e4b2da404abd421196ad2", + "model_id": "0f6e40243f57402d8e6d482590893a38", "version_major": 2, "version_minor": 0 }, @@ -1158,7 +1158,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8e7d2230752744a3a19d5de26e7d2984", + "model_id": "438bfe06f726492dbd4cace16f581f46", "version_major": 2, "version_minor": 0 }, @@ -1220,7 +1220,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "48790374bb3b4c46a355eb730483e073", + "model_id": "15e311ee438f4d1cae6da50540c12e57", "version_major": 2, "version_minor": 0 }, @@ -1284,7 +1284,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0c4a204d4a98447c8c25174973919091", + "model_id": "10faf382628a4918853fd15c64754967", "version_major": 2, "version_minor": 0 }, diff --git a/Reinforcement_Learning/lib/grid_level.py b/Reinforcement_Learning/lib/grid_level.py index 319210c..8a6ba87 100644 --- a/Reinforcement_Learning/lib/grid_level.py +++ b/Reinforcement_Learning/lib/grid_level.py @@ -43,7 +43,7 @@ class GridLevel(): end_color = 'green' # color of the exit square maze = None # instance of maze if defined - debug_maze = True # write the maze to a svg file + debug_maze = False # write the maze to a svg file save_images = False # enable writing canvas as an image @@ -121,6 +121,50 @@ def save_to_file(self, file_name): def get_canvas_dimensions(self): return [self.total_width,self.total_height] + + + def get_available_actions(self,x,y,policy=None): + ''' return the list of available actions for the specified position in the grid ''' + + # test if the level contains a maze + if self.maze is not None: + cell = self.maze.cell_at( x, y ) + + # if a wall is present then that direction is not possible as an action + actions = {k: not v for k, v in cell.walls.items()} + else: + # initially start with all actions being possible + actions = {'N':True,'E':True,'S':True,'W':True} + + # if the center area is not part of the level then remove any actions that would move there + if self.fill_center == True: + if ((x >= 1 and x <= self.width-2) and (y >= 1 and y <= self.height-2)): + actions = {} + else: + if ((x >= 1 and x <= self.width-2) and (y == 0)): del actions['S'] + elif ((x >= 1 and x <= self.width-2) and (y == self.height-1)): del actions['N'] + elif ((y >= 1 and y <= self.height-2) and (x == 0)): del actions['E'] + elif ((y >= 1 and y <= self.height-2) and (x == self.width-1)): del actions['W'] + + # remove actions that would move off the edges of the grid + if x == 0: del actions['W'] + if x == self.width-1: del actions['E'] + if y == 0: del actions['N'] + if y == self.height-1: del actions['S'] + + # test if a policy has been defined + if policy is not None: + # set any allowed actions to false if they're not in the policy + dir_value = policy[y,x] + for direction,v in actions.items(): + if v == True: + if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False + if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False + if (direction == 'E') and not (dir_value & Direction.East): actions['E'] = False + if (direction == 'W') and not (dir_value & Direction.West): actions['W'] = False + + return actions + ''' Draw Functions diff --git a/Reinforcement_Learning/lib/policy.py b/Reinforcement_Learning/lib/policy.py index f33285b..20ecda4 100644 --- a/Reinforcement_Learning/lib/policy.py +++ b/Reinforcement_Learning/lib/policy.py @@ -49,7 +49,7 @@ def calculate_greedy_directions(self,values): return directions def calculate_cell_directions(self,x,y,values): - actions = self.get_available_actions(x,y) + actions = self.level.get_available_actions(x,y) directions = 0 dir_value = 0 @@ -70,33 +70,6 @@ def calculate_cell_directions(self,x,y,values): elif value == best_value: directions += dir_value return int(directions) - - - def get_available_actions(self,x,y): - # test if the level contains a maze - if self.maze is not None: - cell = self.maze.cell_at( x, y ) - - # if a wall is present then that direction is not possible as an action - actions = {k: not v for k, v in cell.walls.items()} - else: - actions = {'N':True,'E':True,'S':True,'W':True} - if self.level.fill_center == True: - if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)): - actions = {} - else: - if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S'] - elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N'] - elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E'] - elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W'] - - if x == 0: del actions['W'] - if x == self.level.width-1: del actions['E'] - if y == 0: del actions['N'] - if y == self.level.height-1: del actions['S'] - - return actions - def get_allowed_actions(self,x,y): @@ -106,7 +79,9 @@ def get_allowed_actions(self,x,y): end = self.level.get_end() if x != end[0] or y != end[1]: - actions = self.get_available_actions(x,y) + # get the actions available for the level + actions = self.level.get_available_actions(x,y) + for direction,v in actions.items(): # test the action is allowed if v == True: diff --git a/Reinforcement_Learning/lib/policy_evaluation.py b/Reinforcement_Learning/lib/policy_evaluation.py index 6e57ce9..6f9e5b0 100644 --- a/Reinforcement_Learning/lib/policy_evaluation.py +++ b/Reinforcement_Learning/lib/policy_evaluation.py @@ -11,7 +11,6 @@ class PolicyEvaluation(): def __init__(self,level,discount_factor = 1): self.level = level - self.maze = level.maze self.start_values = np.zeros((level.height,level.width)) self.end_values = np.zeros((level.height,level.width)) self.discount_factor = discount_factor @@ -71,7 +70,7 @@ def get_state_value(self,pos): return self.start_values[y,x] def calculate_cell_value(self,x,y): - actions = self.get_available_actions(x,y) + actions = self.level.get_available_actions(x,y,self.policy) # check that some actions are possible in this state if not actions: return 0 @@ -97,43 +96,7 @@ def calculate_cell_value(self,x,y): if not num_actions: return 0 # for equal probability of taking an action its just the mean of all actions - return value/num_actions - - def get_available_actions(self,x,y): - # test if the level contains a maze - if self.maze is not None: - cell = self.maze.cell_at( x, y ) - - # if a wall is present then that direction is not possible as an action - actions = {k: not v for k, v in cell.walls.items()} - else: - actions = {'N':True,'E':True,'S':True,'W':True} - if self.level.fill_center == True: - if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)): - actions = {} - else: - if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S'] - elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N'] - elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E'] - elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W'] - - if x == 0: del actions['W'] - if x == self.level.width-1: del actions['E'] - if y == 0: del actions['N'] - if y == self.level.height-1: del actions['S'] - - # test if a policy has been defined - if self.policy is not None: - # set any allowed actions to false if they're not in the policy - dir_value = self.policy[y,x] - for direction,v in actions.items(): - if v == True: - if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False - if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False - if (direction == 'E') and not (dir_value & Direction.East): actions['E'] = False - if (direction == 'W') and not (dir_value & Direction.West): actions['W'] = False - - return actions + return value/num_actions def set_policy(self,policy): ''' set the policy to be evaluated ''' diff --git a/Reinforcement_Learning/lib/value_iteration.py b/Reinforcement_Learning/lib/value_iteration.py index cc19742..74e4765 100644 --- a/Reinforcement_Learning/lib/value_iteration.py +++ b/Reinforcement_Learning/lib/value_iteration.py @@ -1,4 +1,5 @@ import numpy as np +from grid_level import GridLevel ''' implement the Value Iteration algorithm ''' @@ -7,8 +8,7 @@ class ValueIteration(): policy = None def __init__(self,level,discount_factor=0.9): - self.level = level - self.maze = level.maze + self.level = level self.values = np.zeros((level.height,level.width)) self.discount_factor = discount_factor @@ -18,54 +18,12 @@ def get_state_value(self,pos): y = pos[1] if (x < 0 or x >= self.level.width) or (y < 0 or y >= self.level.height): return 0 return self.values[y,x] - - def get_available_actions(self,x,y): - ''' return the list of available actions for the specified position in the grid ''' - - # test if the level contains a maze - if self.maze is not None: - cell = self.maze.cell_at( x, y ) - - # if a wall is present then that direction is not possible as an action - actions = {k: not v for k, v in cell.walls.items()} - else: - # initially start with all actions being possible - actions = {'N':True,'E':True,'S':True,'W':True} - - # if the center area is not part of the level then remove any actions that would move there - if self.level.fill_center == True: - if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)): - actions = {} - else: - if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S'] - elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N'] - elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E'] - elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W'] - - # remove actions that would move off the edges of the grid - if x == 0: del actions['W'] - if x == self.level.width-1: del actions['E'] - if y == 0: del actions['N'] - if y == self.level.height-1: del actions['S'] - - # test if a policy has been defined - if self.policy is not None: - # set any allowed actions to false if they're not in the policy - dir_value = self.policy[y,x] - for direction,v in actions.items(): - if v == True: - if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False - if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False - if (direction == 'E') and not (dir_value & Direction.East): actions['E'] = False - if (direction == 'W') and not (dir_value & Direction.West): actions['W'] = False - - return actions def calculate_max_action_value(self,x,y): ''' calculate the values of all actions in the specified cell and return the largest of these ''' # get the list of available actions for this cell - actions = self.get_available_actions(x,y) + actions = self.level.get_available_actions(x,y,self.policy) # check that some actions are possible in this state if not actions: return 0 diff --git a/Reinforcement_Learning/maze.svg b/Reinforcement_Learning/maze.svg deleted file mode 100644 index 6b9023e..0000000 --- a/Reinforcement_Learning/maze.svg +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -