Remove env creation logic from TrainerController (Unity-Technologies#…

…1562) * Remove env creation logic from TrainerController Currently TrainerController includes logic related to creating the UnityEnvironment, which causes poor separation of concerns between the learn.py application script, TrainerController and UnityEnvironment: * TrainerController must know about the proper way to instantiate the UnityEnvironment, which may differ from application to application. This also makes mocking or subclassing UnityEnvironment more difficult. * Many arguments are passed by learn.py to TrainerController and passed along to UnityEnvironment. This change moves environment construction logic into learn.py, as part of the greater refactor to separate trainer logic from actor / environment.
thomasxm · Jan 24, 2019 · 553c6b7 · 553c6b7
1 parent 9945e9c
commit 553c6b7
Show file tree

Hide file tree

Showing 5 changed files with 603 additions and 312 deletions.
diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py
@@ -3,14 +3,23 @@
 import logging
 
 from multiprocessing import Process, Queue
+import os
+import glob
+import shutil
 import numpy as np
+import yaml
 from docopt import docopt
+from typing import Optional
+
 
 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.exception import TrainerError
+from mlagents.trainers import MetaCurriculumError, MetaCurriculum
+from mlagents.envs import UnityEnvironment
+from mlagents.envs.exception import UnityEnvironmentException
 
 
-def run_training(sub_id, run_seed, run_options, process_queue):
+def run_training(sub_id: int, run_seed: int, run_options, process_queue):
     """
     Launches training session.
     :param process_queue: Queue used to send signal back to main.
@@ -20,36 +29,147 @@ def run_training(sub_id, run_seed, run_options, process_queue):
     """
     # Docker Parameters
     docker_target_name = (run_options['--docker-target-name']
-        if run_options['--docker-target-name'] != 'None' else None)
+                          if run_options['--docker-target-name'] != 'None' else None)
 
     # General parameters
     env_path = (run_options['--env']
-        if run_options['--env'] != 'None' else None)
+                if run_options['--env'] != 'None' else None)
     run_id = run_options['--run-id']
     load_model = run_options['--load']
     train_model = run_options['--train']
     save_freq = int(run_options['--save-freq'])
     keep_checkpoints = int(run_options['--keep-checkpoints'])
     worker_id = int(run_options['--worker-id'])
-    curriculum_file = (run_options['--curriculum']
-        if run_options['--curriculum'] != 'None' else None)
+    curriculum_folder = (run_options['--curriculum']
+                         if run_options['--curriculum'] != 'None' else None)
     lesson = int(run_options['--lesson'])
     fast_simulation = not bool(run_options['--slow'])
     no_graphics = run_options['--no-graphics']
     trainer_config_path = run_options['<trainer-config-path>']
 
-    # Create controller and launch environment.
-    tc = TrainerController(env_path, run_id + '-' + str(sub_id),
-                           save_freq, curriculum_file, fast_simulation,
-                           load_model, train_model, worker_id + sub_id,
-                           keep_checkpoints, lesson, run_seed,
-                           docker_target_name, trainer_config_path, no_graphics)
+    # Recognize and use docker volume if one is passed as an argument
+    if not docker_target_name:
+        model_path = './models/{run_id}'.format(run_id=run_id)
+        summaries_dir = './summaries'
+    else:
+        trainer_config_path = \
+            '/{docker_target_name}/{trainer_config_path}'.format(
+                docker_target_name=docker_target_name,
+                trainer_config_path=trainer_config_path)
+        if curriculum_folder is not None:
+            curriculum_folder = \
+                '/{docker_target_name}/{curriculum_folder}'.format(
+                    docker_target_name=docker_target_name,
+                    curriculum_folder=curriculum_folder)
+        model_path = '/{docker_target_name}/models/{run_id}'.format(
+            docker_target_name=docker_target_name,
+            run_id=run_id)
+        summaries_dir = '/{docker_target_name}/summaries'.format(
+            docker_target_name=docker_target_name)
+
+    trainer_config = load_config(trainer_config_path)
+    env = init_environment(env_path, docker_target_name, no_graphics, worker_id + sub_id, fast_simulation, run_seed)
+    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
+
+    external_brains = {}
+    for brain_name in env.external_brain_names:
+        external_brains[brain_name] = env.brains[brain_name]
+
+    # Create controller and begin training.
+    tc = TrainerController(model_path, summaries_dir, run_id + '-' + str(sub_id),
+                           save_freq, maybe_meta_curriculum,
+                           load_model, train_model,
+                           keep_checkpoints, lesson, external_brains, run_seed)
 
     # Signal that environment has been launched.
     process_queue.put(True)
 
     # Begin training
-    tc.start_learning()
+    tc.start_learning(env, trainer_config)
+
+
+def try_create_meta_curriculum(curriculum_folder: Optional[str], env: UnityEnvironment) -> Optional[MetaCurriculum]:
+    if curriculum_folder is None:
+        return None
+    else:
+        meta_curriculum = MetaCurriculum(curriculum_folder, env._resetParameters)
+        if meta_curriculum:
+            for brain_name in meta_curriculum.brains_to_curriculums.keys():
+                if brain_name not in env.external_brain_names:
+                    raise MetaCurriculumError('One of the curricula '
+                                              'defined in ' +
+                                              curriculum_folder + ' '
+                                              'does not have a corresponding '
+                                              'Brain. Check that the '
+                                              'curriculum file has the same '
+                                              'name as the Brain '
+                                              'whose curriculum it defines.')
+        return meta_curriculum
+
+
+def prepare_for_docker_run(docker_target_name, env_path):
+    for f in glob.glob('/{docker_target_name}/*'.format(
+            docker_target_name=docker_target_name)):
+        if env_path in f:
+            try:
+                b = os.path.basename(f)
+                if os.path.isdir(f):
+                    shutil.copytree(f,
+                                    '/ml-agents/{b}'.format(b=b))
+                else:
+                    src_f = '/{docker_target_name}/{b}'.format(
+                        docker_target_name=docker_target_name, b=b)
+                    dst_f = '/ml-agents/{b}'.format(b=b)
+                    shutil.copyfile(src_f, dst_f)
+                    os.chmod(dst_f, 0o775)  # Make executable
+            except Exception as e:
+                logging.getLogger('mlagents.trainers').info(e)
+    env_path = '/ml-agents/{env_path}'.format(env_path=env_path)
+    return env_path
+
+
+def load_config(trainer_config_path):
+    try:
+        with open(trainer_config_path) as data_file:
+            trainer_config = yaml.load(data_file)
+            return trainer_config
+    except IOError:
+        raise UnityEnvironmentException('Parameter file could not be found '
+                                        'at {}.'
+                                        .format(trainer_config_path))
+    except UnicodeDecodeError:
+        raise UnityEnvironmentException('There was an error decoding '
+                                        'Trainer Config from this path : {}'
+                                        .format(trainer_config_path))
+
+
+def init_environment(env_path, docker_target_name, no_graphics, worker_id, fast_simulation, seed):
+    if env_path is not None:
+        # Strip out executable extensions if passed
+        env_path = (env_path.strip()
+                    .replace('.app', '')
+                    .replace('.exe', '')
+                    .replace('.x86_64', '')
+                    .replace('.x86', ''))
+    docker_training = docker_target_name is not None
+    if docker_training and env_path is not None:
+            """
+            Comments for future maintenance:
+                Some OS/VM instances (e.g. COS GCP Image) mount filesystems 
+                with COS flag which prevents execution of the Unity scene, 
+                to get around this, we will copy the executable into the 
+                container.
+            """
+            # Navigate in docker path and find env_path and copy it.
+            env_path = prepare_for_docker_run(docker_target_name,
+                                              env_path)
+    return UnityEnvironment(
+        file_name=env_path,
+        worker_id=worker_id,
+        seed=seed,
+        docker_training=docker_training,
+        no_graphics=no_graphics
+    )
 
 
 def main():