moved duplicated get_available_actions function

wentongli · Dec 15, 2020 · 139a7e2 · 139a7e2
1 parent 14099a1
commit 139a7e2
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 207 deletions.
diff --git a/Reinforcement_Learning/An Introduction To Reinforcement Learning.ipynb b/Reinforcement_Learning/An Introduction To Reinforcement Learning.ipynb
@@ -147,7 +147,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "69cab65b8edb459c9d4c5ff4712e48e7",
+       "model_id": "74787ac6742c45d098a395775ecc524f",
        "version_major": 2,
        "version_minor": 0
       },
@@ -213,7 +213,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a6f6084fd38b4a8f8b713352ea1a7f0a",
+       "model_id": "3a2effcaf32f465186c0720b2fcfc905",
        "version_major": 2,
        "version_minor": 0
       },
@@ -267,7 +267,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "99192eaf2830438cab1af172d2fbdfd7",
+       "model_id": "d26086274bf24f1ab047700dbf4e92c2",
        "version_major": 2,
        "version_minor": 0
       },
@@ -457,7 +457,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "48d75e53b598491d968f972d03e41cdc",
+       "model_id": "9cf52d7c5be343f9809f0b3787e89bbd",
        "version_major": 2,
        "version_minor": 0
       },
@@ -510,7 +510,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "47d4fa35372c4fe89ad6b6989d444ec2",
+       "model_id": "6a31f86bf68148bea389d94da7cec3e2",
        "version_major": 2,
        "version_minor": 0
       },
@@ -548,7 +548,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ca7c1509ddd34e5bb592114a9c04ee9d",
+       "model_id": "c94ca81598764e5cbe8a9ba7cb523a25",
        "version_major": 2,
        "version_minor": 0
       },
@@ -605,7 +605,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "03a1d2df912e469a9aa6768df4d5cbb1",
+       "model_id": "98f2da83584548efaef0b1d7a7c0d3c1",
        "version_major": 2,
        "version_minor": 0
       },
@@ -650,7 +650,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4144e64510ae4f7d8073391b6d5fe370",
+       "model_id": "5588eb7c16774f2fa4f3c830f64e6e4a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -753,7 +753,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "322c54266f844341a895c0ad67149165",
+       "model_id": "de786cc3dab641228914290eb9e70d74",
        "version_major": 2,
        "version_minor": 0
       },
@@ -838,7 +838,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "35866e7fa20e48c99decb65371f1b664",
+       "model_id": "eac1c844487a4f2b86d35abd4a8b1f26",
        "version_major": 2,
        "version_minor": 0
       },
@@ -887,7 +887,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "07bfada1015c4e8c8aeaf02906e655a5",
+       "model_id": "381e1577c1a241008e5e581861c21099",
        "version_major": 2,
        "version_minor": 0
       },
@@ -931,7 +931,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cd82cd9350dd4dbda355d5635f339350",
+       "model_id": "99e7d07f1fcc44d0ac3c4fafcabc1795",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1045,7 +1045,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3d59a4ebe04e4b2da404abd421196ad2",
+       "model_id": "0f6e40243f57402d8e6d482590893a38",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1158,7 +1158,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8e7d2230752744a3a19d5de26e7d2984",
+       "model_id": "438bfe06f726492dbd4cace16f581f46",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1220,7 +1220,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "48790374bb3b4c46a355eb730483e073",
+       "model_id": "15e311ee438f4d1cae6da50540c12e57",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1284,7 +1284,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0c4a204d4a98447c8c25174973919091",
+       "model_id": "10faf382628a4918853fd15c64754967",
        "version_major": 2,
        "version_minor": 0
       },

diff --git a/Reinforcement_Learning/lib/grid_level.py b/Reinforcement_Learning/lib/grid_level.py
@@ -43,7 +43,7 @@ class GridLevel():
   end_color = 'green'     # color of the exit square
 
   maze = None             # instance of maze if defined
-  debug_maze = True       # write the maze to a svg file
+  debug_maze = False      # write the maze to a svg file
 
   save_images = False     # enable writing canvas as an image
 
@@ -121,6 +121,50 @@ def save_to_file(self, file_name):
 
   def get_canvas_dimensions(self):
     return [self.total_width,self.total_height]
+
+
+  def get_available_actions(self,x,y,policy=None):
+    ''' return the list of available actions for the specified position in the grid '''
+
+    # test if the level contains a maze
+    if self.maze is not None:        
+      cell = self.maze.cell_at( x, y )  
+
+      # if a wall is present then that direction is not possible as an action
+      actions = {k: not v for k, v in cell.walls.items()}      
+    else:
+      # initially start with all actions being possible
+      actions = {'N':True,'E':True,'S':True,'W':True}
+
+      # if the center area is not part of the level then remove any actions that would move there
+      if self.fill_center == True:
+        if ((x >= 1 and x <= self.width-2) and (y >= 1 and y <= self.height-2)): 
+          actions = {}
+        else:
+          if   ((x >= 1 and x <= self.width-2) and (y == 0)): del actions['S'] 
+          elif ((x >= 1 and x <= self.width-2) and (y == self.height-1)): del actions['N'] 
+          elif ((y >= 1 and y <= self.height-2) and (x == 0)): del actions['E']             
+          elif ((y >= 1 and y <= self.height-2) and (x == self.width-1)): del actions['W']
+
+      # remove actions that would move off the edges of the grid
+      if x == 0: del actions['W']
+      if x == self.width-1: del actions['E']
+      if y == 0: del actions['N']
+      if y == self.height-1: del actions['S']     
+
+    # test if a policy has been defined
+    if policy is not None:
+      # set any allowed actions to false if they're not in the policy
+      dir_value = policy[y,x]
+      for direction,v in actions.items():        
+        if v == True:
+          if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False
+          if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False
+          if (direction == 'E') and not (dir_value & Direction.East):  actions['E'] = False
+          if (direction == 'W') and not (dir_value & Direction.West):  actions['W'] = False      
+
+    return actions
+
 
   '''
     Draw Functions

diff --git a/Reinforcement_Learning/lib/policy.py b/Reinforcement_Learning/lib/policy.py
@@ -49,7 +49,7 @@ def calculate_greedy_directions(self,values):
     return directions
 
   def calculate_cell_directions(self,x,y,values):
-    actions = self.get_available_actions(x,y)
+    actions = self.level.get_available_actions(x,y)
 
     directions = 0
     dir_value = 0
@@ -70,33 +70,6 @@ def calculate_cell_directions(self,x,y,values):
         elif value == best_value: directions += dir_value
 
     return int(directions)
-
-
-  def get_available_actions(self,x,y):
-    # test if the level contains a maze
-    if self.maze is not None:        
-      cell = self.maze.cell_at( x, y )  
-
-      # if a wall is present then that direction is not possible as an action
-      actions = {k: not v for k, v in cell.walls.items()}      
-    else:
-      actions = {'N':True,'E':True,'S':True,'W':True}
-      if self.level.fill_center == True:
-        if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)): 
-          actions = {}
-        else:
-          if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S'] 
-          elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N'] 
-          elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E']             
-          elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W']
-
-      if x == 0: del actions['W']
-      if x == self.level.width-1: del actions['E']
-      if y == 0: del actions['N']
-      if y == self.level.height-1: del actions['S']        
-
-    return actions   
-
 
   def get_allowed_actions(self,x,y):
 
@@ -106,7 +79,9 @@ def get_allowed_actions(self,x,y):
     end = self.level.get_end()    
     if x != end[0] or y != end[1]:    
 
-      actions = self.get_available_actions(x,y)    
+      # get the actions available for the level
+      actions = self.level.get_available_actions(x,y)    
+
       for direction,v in actions.items():
         # test the action is allowed
         if v == True:                  

diff --git a/Reinforcement_Learning/lib/policy_evaluation.py b/Reinforcement_Learning/lib/policy_evaluation.py
@@ -11,7 +11,6 @@ class PolicyEvaluation():
 
   def __init__(self,level,discount_factor = 1):
     self.level = level
-    self.maze = level.maze
     self.start_values = np.zeros((level.height,level.width))
     self.end_values = np.zeros((level.height,level.width))
     self.discount_factor = discount_factor
@@ -71,7 +70,7 @@ def get_state_value(self,pos):
     return self.start_values[y,x]
 
   def calculate_cell_value(self,x,y):
-    actions = self.get_available_actions(x,y)
+    actions = self.level.get_available_actions(x,y,self.policy)
 
     # check that some actions are possible in this state
     if not actions: return 0
@@ -97,43 +96,7 @@ def calculate_cell_value(self,x,y):
     if not num_actions: return 0        
 
     # for equal probability of taking an action its just the mean of all actions
-    return value/num_actions    
-
-  def get_available_actions(self,x,y):
-    # test if the level contains a maze
-    if self.maze is not None:        
-      cell = self.maze.cell_at( x, y )  
-
-      # if a wall is present then that direction is not possible as an action
-      actions = {k: not v for k, v in cell.walls.items()}      
-    else:
-      actions = {'N':True,'E':True,'S':True,'W':True}
-      if self.level.fill_center == True:
-        if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)): 
-          actions = {}
-        else:
-          if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S'] 
-          elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N'] 
-          elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E']             
-          elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W']
-
-      if x == 0: del actions['W']
-      if x == self.level.width-1: del actions['E']
-      if y == 0: del actions['N']
-      if y == self.level.height-1: del actions['S']     
-
-    # test if a policy has been defined
-    if self.policy is not None:
-      # set any allowed actions to false if they're not in the policy
-      dir_value = self.policy[y,x]
-      for direction,v in actions.items():        
-        if v == True:
-          if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False
-          if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False
-          if (direction == 'E') and not (dir_value & Direction.East):  actions['E'] = False
-          if (direction == 'W') and not (dir_value & Direction.West):  actions['W'] = False      
-
-    return actions    
+    return value/num_actions         
 
   def set_policy(self,policy):
     ''' set the policy to be evaluated '''

diff --git a/Reinforcement_Learning/lib/value_iteration.py b/Reinforcement_Learning/lib/value_iteration.py
@@ -1,4 +1,5 @@
 import numpy as np
+from grid_level import GridLevel
 
 
 ''' implement the Value Iteration algorithm '''
@@ -7,8 +8,7 @@ class ValueIteration():
   policy = None
 
   def __init__(self,level,discount_factor=0.9):
-    self.level = level    
-    self.maze = level.maze
+    self.level = level        
     self.values = np.zeros((level.height,level.width))
     self.discount_factor = discount_factor       
 
@@ -18,54 +18,12 @@ def get_state_value(self,pos):
     y = pos[1]
     if (x < 0 or x >= self.level.width) or (y < 0 or y >= self.level.height): return 0
     return self.values[y,x]    
-
-  def get_available_actions(self,x,y):
-    ''' return the list of available actions for the specified position in the grid '''
-
-    # test if the level contains a maze
-    if self.maze is not None:        
-      cell = self.maze.cell_at( x, y )  
-
-      # if a wall is present then that direction is not possible as an action
-      actions = {k: not v for k, v in cell.walls.items()}      
-    else:
-      # initially start with all actions being possible
-      actions = {'N':True,'E':True,'S':True,'W':True}
-
-      # if the center area is not part of the level then remove any actions that would move there
-      if self.level.fill_center == True:
-        if ((x >= 1 and x <= self.level.width-2) and (y >= 1 and y <= self.level.height-2)): 
-          actions = {}
-        else:
-          if ((x >= 1 and x <= self.level.width-2) and (y == 0)): del actions['S'] 
-          elif ((x >= 1 and x <= self.level.width-2) and (y == self.level.height-1)): del actions['N'] 
-          elif ((y >= 1 and y <= self.level.height-2) and (x == 0)): del actions['E']             
-          elif ((y >= 1 and y <= self.level.height-2) and (x == self.level.width-1)): del actions['W']
-
-      # remove actions that would move off the edges of the grid
-      if x == 0: del actions['W']
-      if x == self.level.width-1: del actions['E']
-      if y == 0: del actions['N']
-      if y == self.level.height-1: del actions['S']     
-
-    # test if a policy has been defined
-    if self.policy is not None:
-      # set any allowed actions to false if they're not in the policy
-      dir_value = self.policy[y,x]
-      for direction,v in actions.items():        
-        if v == True:
-          if (direction == 'N') and not (dir_value & Direction.North): actions['N'] = False
-          if (direction == 'S') and not (dir_value & Direction.South): actions['S'] = False
-          if (direction == 'E') and not (dir_value & Direction.East):  actions['E'] = False
-          if (direction == 'W') and not (dir_value & Direction.West):  actions['W'] = False      
-
-    return actions
 
   def calculate_max_action_value(self,x,y):
     ''' calculate the values of all actions in the specified cell and return the largest of these '''
 
     # get the list of available actions for this cell
-    actions = self.get_available_actions(x,y)
+    actions = self.level.get_available_actions(x,y,self.policy)
 
     # check that some actions are possible in this state
     if not actions: return 0