smart cab

willhwongwork · Jun 23, 2017 · eff6d55 · eff6d55
1 parent f572110
commit eff6d55
Show file tree

Hide file tree

Showing 42 changed files with 8,388 additions and 0 deletions.
diff --git a/.ipynb_checkpoints/smartcab-checkpoint.ipynb b/.ipynb_checkpoints/smartcab-checkpoint.ipynb
diff --git a/README.md b/README.md
@@ -0,0 +1,21 @@
+# Machine Learning Engineer Nanodegree
+# Reinforcement Learning
+## Project: Train a Smartcab How to Drive
+
+### Install
+
+This project requires **Python 2.7** with the [pygame](https://www.pygame.org/wiki/GettingStarted
+) library installed
+
+### Code
+
+Template code is provided in the `smartcab/agent.py` python file. Additional supporting python code can be found in `smartcab/enviroment.py`, `smartcab/planner.py`, and `smartcab/simulator.py`. Supporting images for the graphical user interface can be found in the `images` folder. While some code has already been implemented to get you started, you will need to implement additional functionality for the `LearningAgent` class in `agent.py` when requested to successfully complete the project. 
+
+### Run
+
+In a terminal or command window, navigate to the top-level project directory `smartcab/` (that contains this README) and run one of the following commands:
+
+```python smartcab/agent.py```  
+```python -m smartcab.agent```
+
+This will run the `agent.py` file and execute your agent code.
diff --git a/agent.py b/agent.py
@@ -0,0 +1,208 @@
+import random
+import math
+from environment import Agent, Environment
+from planner import RoutePlanner
+from simulator import Simulator
+
+class LearningAgent(Agent):
+    """ An agent that learns to drive in the Smartcab world.
+        This is the object you will be modifying. """ 
+
+    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
+        super(LearningAgent, self).__init__(env)     # Set the agent in the evironment 
+        self.planner = RoutePlanner(self.env, self)  # Create a route planner
+        self.valid_actions = self.env.valid_actions  # The set of valid actions
+
+        # Set parameters of the learning agent
+        self.learning = learning # Whether the agent is expected to learn
+        self.Q = dict()          # Create a Q-table which will be a dictionary of tuples
+        self.epsilon = epsilon   # Random exploration factor
+        self.alpha = alpha       # Learning factor
+        self.N = 0     
+        ###########
+        ## TO DO ##
+        ###########
+        # Set any additional class parameters as needed
+
+
+    def reset(self, destination=None, testing=False):
+        """ The reset function is called at the beginning of each trial.
+            'testing' is set to True if testing trials are being used
+            once training trials have completed. """
+
+        # Select the destination as the new location to route to
+        self.planner.route_to(destination)
+
+        ########### 
+        ## TO DO ##
+        ###########
+        # Update epsilon using a decay function of your choice
+        self.N += 1
+        #self.epsilon = math.exp(-0.005*self.alpha*self.N)
+        #self.alpha = 0.8*math.exp(-0.01*self.N)
+        self.epsilon -= 0.001
+        # Update additional class parameters as needed
+        # If 'testing' is True, set epsilon and alpha to 0
+        if testing == True:
+            self.epsilon = 0
+            self.alpha = 0
+
+        return None
+
+    def build_state(self):
+        """ The build_state function is called when the agent requests data from the 
+            environment. The next waypoint, the intersection inputs, and the deadline 
+            are all features available to the agent. """
+
+        # Collect data about the environment
+        waypoint = self.planner.next_waypoint() # The next waypoint 
+        inputs = self.env.sense(self)           # Visual input - intersection light and traffic
+        deadline = self.env.get_deadline(self)  # Remaining deadline
+
+        ########### 
+        ## TO DO ##
+        ###########
+        # Set 'state' as a tuple of relevant data for the agent        
+        state = (waypoint, inputs['light'], ('oncoming',inputs['oncoming']), ('left',inputs['left']))
+
+        return state
+
+
+    def get_maxQ(self, state):
+        """ The get_max_Q function is called when the agent is asked to find the
+            maximum Q-value of all actions based on the 'state' the smartcab is in. """
+
+        ########### 
+        ## TO DO ##
+        ###########
+        # Calculate the maximum Q-value of all actions for a given state
+
+        maxQ = max(self.Q[state].values())
+
+        return maxQ 
+
+
+    def createQ(self, state):
+        """ The createQ function is called when a state is generated by the agent. """
+
+        ########### 
+        ## TO DO ##
+        ###########
+        # When learning, check if the 'state' is not in the Q-table
+        # If it is not, create a new dictionary for that state
+        #   Then, for each action available, set the initial Q-value to 0.0
+        if self.learning == True:
+            if self.Q.get(state) == None:
+                self.Q[state] = {a:0.0 for a in Environment.valid_actions}
+
+        return
+
+
+    def choose_action(self, state):
+        """ The choose_action function is called when the agent is asked to choose
+            which action to take, based on the 'state' the smartcab is in. """
+
+        # Set the agent state and default action
+        self.state = state
+        self.next_waypoint = self.planner.next_waypoint()
+        action = None
+
+        ########### 
+        ## TO DO ##
+        ###########
+        # When not learning, choose a random action
+        if self.learning  == False:
+            action = random.choice(Environment.valid_actions)
+
+        # When learning, choose a random action with 'epsilon' probability
+        #   Otherwise, choose an action with the highest Q-value for the current state
+        else:
+            keys = []
+            if random.random() < self.epsilon:
+                action = random.choice(Environment.valid_actions)
+            else:
+                for key, value in self.Q[state].iteritems():
+                    if value == self.get_maxQ(state):
+                        keys.append(key)
+                        print "Appending key={}, value={}".format(key, value)
+                action = random.choice(keys)
+                print "action is={}".format(action)
+
+        return action
+
+
+    def learn(self, state, action, reward):
+        """ The learn function is called after the agent completes an action and
+            receives an award. This function does not consider future rewards 
+            when conducting learning. """
+
+        ########### 
+        ## TO DO ##
+        ###########
+        # When learning, implement the value iteration update rule
+        #   Use only the learning rate 'alpha' (do not use the discount factor 'gamma')
+        if self.learning == True:
+            self.Q[state][action] += self.alpha * (reward - self.Q[state][action]) 
+
+        return
+
+
+    def update(self):
+        """ The update function is called when a time step is completed in the 
+            environment for a given trial. This function will build the agent
+            state, choose an action, receive a reward, and learn if enabled. """
+
+        state = self.build_state()          # Get current state
+        self.createQ(state)                 # Create 'state' in Q-table
+        action = self.choose_action(state)  # Choose an action
+        reward = self.env.act(self, action) # Receive a reward
+        self.learn(state, action, reward)   # Q-learn
+
+        return
+
+
+def run():
+    """ Driving function for running the simulation. 
+        Press ESC to close the simulation, or [SPACE] to pause the simulation. """
+
+    ##############
+    # Create the environment
+    # Flags:
+    #   verbose     - set to True to display additional output from the simulation
+    #   num_dummies - discrete number of dummy agents in the environment, default is 100
+    #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
+    env = Environment()
+
+    ##############
+    # Create the driving agent
+    # Flags:
+    #   learning   - set to True to force the driving agent to use Q-learning
+    #    * epsilon - continuous value for the exploration factor, default is 1
+    #    * alpha   - continuous value for the learning rate, default is 0.5
+    agent = env.create_agent(LearningAgent, learning=True, alpha=0.5)
+
+    ##############
+    # Follow the driving agent
+    # Flags:
+    #   enforce_deadline - set to True to enforce a deadline metric
+    env.set_primary_agent(agent, enforce_deadline=True)
+
+    ##############
+    # Create the simulation
+    # Flags:
+    #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
+    #   display      - set to False to disable the GUI if PyGame is enabled
+    #   log_metrics  - set to True to log trial and simulation results to /logs
+    #   optimized    - set to True to change the default log file name
+    sim = Simulator(env, update_delay=0.01, log_metrics=True, optimized=True)
+
+    ##############
+    # Run the simulator
+    # Flags:
+    #   tolerance  - epsilon tolerance before beginning testing, default is 0.05 
+    #   n_test     - discrete number of testing trials to perform, default is 0
+    sim.run(tolerance=0.01, n_test=100)
+
+
+if __name__ == '__main__':
+    run()
diff --git a/images/car-black.png b/images/car-black.png
diff --git a/images/car-blue.png b/images/car-blue.png
diff --git a/images/car-cyan.png b/images/car-cyan.png
diff --git a/images/car-green.png b/images/car-green.png
diff --git a/images/car-magenta.png b/images/car-magenta.png
diff --git a/images/car-orange.png b/images/car-orange.png
diff --git a/images/car-red.png b/images/car-red.png
diff --git a/images/car-white.png b/images/car-white.png
diff --git a/images/car-yellow.png b/images/car-yellow.png
diff --git a/images/east-west.png b/images/east-west.png
diff --git a/images/logo.png b/images/logo.png
diff --git a/images/north-south.png b/images/north-south.png
diff --git a/logs/.gitignore b/logs/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore