Implemented first experiment

mdtoom · Jan 10, 2019 · 8f63ae4 · 8f63ae4
1 parent d239963
commit 8f63ae4
Show file tree

Hide file tree

Showing 7 changed files with 253 additions and 49 deletions.
diff --git a/experiments/config-feedforward b/experiments/config-feedforward
@@ -0,0 +1,82 @@
+#--- parameters for the cartpole experiment ---#
+
+[NEAT]
+fitness_criterion     = max
+fitness_threshold     = 100
+pop_size              = 100
+reset_on_extinction   = False
+
+[DefaultGenome]
+# node activation options
+activation_default      = sigmoid
+activation_mutate_rate  = 0.0
+activation_options      = sigmoid
+
+# node aggregation options
+aggregation_default     = sum
+aggregation_mutate_rate = 0.0
+aggregation_options     = sum
+
+# node bias options
+bias_init_mean          = 0.5
+bias_init_stdev         = 1.0
+bias_max_value          = 1.0
+bias_min_value          = -1.0
+bias_mutate_power       = 0.5
+bias_mutate_rate        = 0.7
+bias_replace_rate       = 0.1
+
+# genome compatibility options
+compatibility_disjoint_coefficient = 1.0
+compatibility_weight_coefficient   = 0.5
+
+# connection add/remove rates
+conn_add_prob           = 0.5
+conn_delete_prob        = 0.5
+
+# connection enable options
+enabled_default         = True
+enabled_mutate_rate     = 0.01
+
+feed_forward            = True
+initial_connection      = full
+
+# node add/remove rates
+node_add_prob           = 0.5
+node_delete_prob        = 0.2
+
+# network parameters
+num_hidden              = 0
+num_inputs              = 17
+num_outputs             = 4
+
+# node response options
+response_init_mean      = 1.0
+response_init_stdev     = 0.0
+response_max_value      = 5.0
+response_min_value      = -5.0
+response_mutate_power   = 0.0
+response_mutate_rate    = 0.0
+response_replace_rate   = 0.0
+
+# connection weight options
+weight_init_mean        = 0.0
+weight_init_stdev       = 1.5
+weight_max_value        = 10
+weight_min_value        = -10
+weight_mutate_power     = 0.5
+weight_mutate_rate      = 0.8
+weight_replace_rate     = 0.1
+
+[DefaultSpeciesSet]
+compatibility_threshold = 3.0
+
+[DefaultStagnation]
+species_fitness_func = max
+max_stagnation       = 20
+species_elitism      = 2
+
+[DefaultReproduction]
+elitism            = 2
+survival_threshold = 0.2
+
diff --git a/experiments/simple_metric_experiment.py b/experiments/simple_metric_experiment.py
@@ -0,0 +1,70 @@
+"""
+Simple example using the tile structure creation task.
+"""
+
+from __future__ import print_function
+import os
+import neat
+import gym
+import gym_multi_robot
+
+num_steps = 3000
+num_robots = 5
+num_trials = 10
+
+env = gym.make('tiling-pattern-v0')
+
+
+def eval_genomes(genomes, config):
+    count = 0
+    for genome_id, genome in genomes:
+        print(count)
+        count += 1
+        net = neat.nn.FeedForwardNetwork.create(genome, config)
+        genome.fitness = run_environment(net)
+
+
+def run_environment(net):
+    reward = 0
+    for _ in range(num_trials):
+        observation = env.reset()
+
+        sub_reward = 0
+
+        for i in range(num_steps):
+            output = [net.activate(observation[i]) for i in range(len(observation))]
+            observation, sub_reward, done, info = env.step(output)
+
+        reward += sub_reward
+
+    return reward / num_trials
+
+
+def run(config_file):
+    # Load configuration.
+    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
+                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
+                         config_file)
+
+    # Create the population, which is the top-level object for a NEAT run.
+    p = neat.Population(config)
+
+    # Add a stdout reporter to show progress in the terminal.
+    p.add_reporter(neat.StdOutReporter(True))
+    stats = neat.StatisticsReporter()
+    p.add_reporter(stats)
+
+    # Run for up to 300 generations.
+    winner = p.run(eval_genomes, 300)
+
+    # Display the winning genome.
+    print('\nBest genome:\n{!s}'.format(winner))
+
+
+if __name__ == '__main__':
+    # Determine path to configuration file. This path manipulation is
+    # here so that the script will run successfully regardless of the
+    # current working directory.
+    local_dir = os.path.dirname(__file__)
+    config_path = os.path.join(local_dir, 'config-feedforward')
+    run(config_path)
diff --git a/gym_multi_robot/envs/gripping_robot.py b/gym_multi_robot/envs/gripping_robot.py
@@ -37,6 +37,9 @@ def __init__(self):
         self.has_obstacle = False
         self.has_robot = False
 
+    def to_int_list(self):
+        return [int(self.has_tile), int(self.has_obstacle), int(self.has_robot)]
+
 
 class GripperRobot:
     """This class comprises a simple gripper robot."""
@@ -48,7 +51,6 @@ def __init__(self, identifier, heading=Heading.NORTH, location=(0, 0)):
         self.hold_object = False
         self.heading = heading
 
-        print(self.heading)
         self.location = location
         self.identifier = identifier
 
@@ -61,7 +63,6 @@ def pickup(self, game):
         if not self.hold_object and game.has_tile(self.location):
             self.hold_object = True
             game.grid[self.location[0]][self.location[1]] = False
-            print(game.grid[self.location[0]][self.location[1]])
 
     def drop(self, game):
         """This method drops an object if it is holding any."""
@@ -82,16 +83,13 @@ def move(self, move_forward, rotation, game):
         if move_forward:
             change = Heading.heading_to_change(self.heading)
             new_location = tuple(map(operator.add, self.location, change))
-            print(str(self.location) + '->' + str(new_location))
 
             # Check whether the location is within the grid.
             if not game.inside_grid(new_location):
-                print("Grid")
                 return
 
             # Check whether the new location is free.
             if game.has_robot(new_location):    # TODO: maybe it is better to leave this out.
-                print("Robot")
                 return
 
             self.location = new_location
@@ -108,13 +106,18 @@ def step(self, actions, game):
         """
 
         # execute actions
-        if actions[2]:
+        if bool(round(actions[2])) and not actions[2] < 0:  # only pickup if positive integer.
             self.pickup(game)
 
-        if actions[3]:
+        if bool(round(actions[3])) and not actions[3] < 0:  # only drop if positive integer.
             self.drop(game)
 
-        self.move(actions[0], actions[1], game)
+        move_bool = bool(round(actions[0])) and not actions[0] < 0
+
+        actions[1] = int(round(actions[1]))
+        rotate = int(actions[1] > 0) - int(actions[1] < 0)
+
+        self.move(move_bool, rotate, game)
 
         # get observation
         return self.get_observation(game)
@@ -136,7 +139,12 @@ def get_observation(self, grid):
 
             observations.append(observation)
 
-        return [self.hold_object, grid.has_tile(self.location), observations]
+        int_observation = [int(self.hold_object), int(grid.has_tile(self.location))]
+
+        for observation in observations:
+            int_observation += observation.to_int_list()
+
+        return int_observation
 
     def generate_observed_locations(self):
         """ This method generates the locations that this robot currently observes."""

diff --git a/gym_multi_robot/envs/tiling_pattern_env.py b/gym_multi_robot/envs/tiling_pattern_env.py
@@ -1,5 +1,4 @@
 import gym
-import numpy as np
 from gym.utils import seeding
 
 from gym_multi_robot.envs.tiling_pattern_view_2d import TilingPatternView2D
@@ -14,7 +13,7 @@ def __init__(self, lattice_size=2, x_dim=7, y_dim=5, seed=None, num_robots=2):
 
         self.game_view = TilingPatternView2D(
             maze_name="OpenAI Gym - Tiling Pattern ({0} x {1} x {2})".format(x_dim, y_dim, lattice_size),
-            lattice_size=2, world_size=(x_dim, y_dim), screen_size=(640, 640))
+            lattice_size=2, world_size=(x_dim, y_dim))
 
         # Simulation related variables.
         self._seed(seed)
@@ -40,7 +39,8 @@ def _configure(self, display=None):
     def step(self, actions):
 
         observation = self.game_view.game.update_robots(actions)
-        reward = 0
+
+        reward = self.game_view.game.get_fitness()
         done = self.game_view.game_over
         info = dict()
 

diff --git a/gym_multi_robot/envs/tiling_pattern_game.py b/gym_multi_robot/envs/tiling_pattern_game.py
@@ -1,5 +1,6 @@
 import random
 import math
+import gym
 
 from gym_multi_robot.envs.gripping_robot import GripperRobot, Heading
 
@@ -10,7 +11,7 @@ class TilingPatternGame:
     def __init__(self, grid_size, lattice_size, num_robots=5):
         self.grid_size = grid_size
         self.lattice_size = lattice_size
-        self.grid = [[0 for _ in range(self.GRID_H)] for _ in range(self.GRID_W)]
+        self.grid = [[False for _ in range(self.GRID_H)] for _ in range(self.GRID_W)]
         self.num_robots = num_robots
         self.robots = []
 
@@ -19,33 +20,72 @@ def __init__(self, grid_size, lattice_size, num_robots=5):
 
     def reset_grid(self):
 
+        self.grid = [[False for _ in range(self.GRID_H)] for _ in range(self.GRID_W)]
+
         num_tiles = int(math.floor(self.GRID_W / self.lattice_size) * math.floor(self.GRID_H / self.lattice_size))
 
         for _ in range(num_tiles):
 
-            put = False
-            while not put:
+            while True:
                 rand_loc = (random.randint(0, self.grid_size[0] - 1), random.randint(0, self.grid_size[1] - 1))
                 if not self.has_tile(rand_loc):
                     self.grid[rand_loc[0]][rand_loc[1]] = True
-                    put = True
+                    break
+
+        # for i in range(0, len(self.grid), self.lattice_size):
+        #     for j in range(0, len(self.grid[0]), self.lattice_size):
+        #         self.grid[i][j] = True
 
     def reset_robots(self):
         """ The method creates new robots at random positions."""
 
-        observations = []
-
         while len(self.robots) is not self.num_robots:
 
             rand_loc = (random.randint(0, self.grid_size[0] - 1), random.randint(0, self.grid_size[1] - 1))
             if not self.has_robot(rand_loc):
                 rand_heading = Heading(random.randint(0, 3))
                 robot = GripperRobot(len(self.robots), location=rand_loc, heading=rand_heading)
                 self.robots.append(robot)
-                observations.append(robot.get_observation(self))
+
+        observations = []
+        for robot in self.robots:
+            observations.append(robot.get_observation(self))
 
         return observations
 
+    def get_fitness(self):
+        """ This function gets the fitness the current tile construction.
+            The grid is divided in grid blocks, which all need to have the same number of tiles on it in order to
+            be a perfect tile construction.
+        """
+        s = -100
+        p_js = []
+
+        for i in range(0, len(self.grid) - 1, self.lattice_size): # -1 to not take into account the last block.
+            for j in range(0, len(self.grid[0]) - 1, self.lattice_size):
+
+                count = 0
+                # Count the number of squares in this lattice. p_j in paper.
+                for x in range(self.lattice_size + 1):
+                    for y in range(self.lattice_size + 1):
+                        count += int(self.grid[i + x][j + y])
+
+                p_js.append(count)
+
+        summed_p_j = sum(p_js)
+        f = 0
+
+        for i in range(len(p_js)):
+            if p_js[i] is not 0:
+                p_js[i] /= summed_p_j               # Divide by sum of all elements.
+                f += p_js[i] * math.log(p_js[i])    # Calculate entropy.
+
+        f *= s / math.log(len(p_js))
+
+        # TODO: possibly count tiles that robot holds.
+
+        return f
+
     def update_robots(self, actions):
         if len(actions) is not len(self.robots):
             raise TypeError("Should give an action for each robot.")