Move kube_shell into dvactl with defaults. Avoid race in kube mode by

creating initial models on scheduler, which is also launched before webserver and is a singleton. Soon: Export trained approximators. FAISS K-NN graph. Next: - Implement pre-condition checks, failed to launch error messages for processes. Rollback created models on failure to launch. - Implement clustering. - Implement location net, fine-tuning / training face recognition models
zhaimq · Jun 9, 2018 · c7f1675 · c7f1675
1 parent b1d2a16
commit c7f1675
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 43 deletions.
diff --git a/deploy/kube/__init__.py b/deploy/kube/__init__.py
@@ -77,13 +77,13 @@ def launch_kube():
     run_commands(init_commands)
     print "sleeping for 120 seconds"
     time.sleep(120)
-    webserver_commands = ['kubectl create -n {} -f deploy/kube/webserver.yaml'.format(namespace), ]
-    run_commands(webserver_commands)
-    print "webserver launched, sleeping for 60 seconds"
-    time.sleep(60)
     scheduler_commands = ['kubectl create -n {} -f deploy/kube/scheduler.yaml'.format(namespace), ]
     run_commands(scheduler_commands)
-    print "scheduler launched, sleeping for 10 seconds"
+    print "scheduler launched, sleeping for 90 seconds"
+    time.sleep(90)
+    webserver_commands = ['kubectl create -n {} -f deploy/kube/webserver.yaml'.format(namespace), ]
+    run_commands(webserver_commands)
+    print "webserver launched, sleeping for 10 seconds"
     time.sleep(10)
     commands = []
     worker_template = file('./deploy/kube/worker.yaml.template').read()

diff --git a/dvactl b/dvactl
@@ -301,7 +301,7 @@ if __name__ == '__main__':
     ./dvactl start 
     ./dvactl auth  # recreates creds.json  
     ./dvactl exec -f script.json  # run process using creds.json and REST API
-    ./dvactl shell -c (container default:webserver) -pod (default:empty)  # enter into a shell
+    ./dvactl shell --container (container default:webserver) --pod (default:empty)  # enter into a shell
     ./dvactl stop 
     ./dvactl clean 
     """

diff --git a/server/init_fs.py b/server/init_fs.py
@@ -17,7 +17,6 @@
 from dvaapp.fs import get_path_to_file
 from django.utils import timezone
 
-
 def create_model(m, init_event):
     try:
         if m['model_type'] == TrainedModel.DETECTOR:
@@ -44,6 +43,47 @@ def create_model(m, init_event):
         pass
 
 
+def init_models():
+    # In Kube mode create models when scheduler is launched which is always the first container.
+    local_models_path = "../configs/custom_defaults/trained_models.json"
+    if 'INIT_MODELS' in os.environ and os.environ['INIT_MODELS'].strip():
+        remote_models_path = os.environ['INIT_MODELS']
+        if not remote_models_path.startswith('/root/DVA/configs/custom_defaults/'):
+            local_models_path = 'custom_models.json'
+            get_path_to_file(remote_models_path, local_models_path)
+        else:
+            local_models_path = remote_models_path
+    default_models = json.loads(file(local_models_path).read())
+    if settings.KUBE_MODE and 'LAUNCH_SCHEDULER' in os.environ:
+        init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True
+                                           , start_ts=timezone.now())
+        for m in default_models:
+            create_model(m, init_event)
+    elif not settings.KUBE_MODE:
+        init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True,
+                                           start_ts=timezone.now())
+        for m in default_models:
+            create_model(m, init_event)
+
+
+def init_process():
+    if 'INIT_PROCESS' in os.environ:
+        path = os.environ.get('INIT_PROCESS', None)
+        if path and path.strip():
+            if not path.startswith('/root/DVA/configs/custom_defaults/'):
+                get_path_to_file(path, "temp.json")
+                path = 'temp.json'
+            try:
+                jspec = json.load(file(path))
+            except:
+                logging.exception("could not load : {}".format(path))
+            else:
+                p = DVAPQLProcess()
+                if DVAPQL.objects.count() == 0:
+                    p.create_from_json(jspec)
+                    p.launch()
+
+
 if __name__ == "__main__":
     if 'SUPERUSER' in os.environ and not User.objects.filter(is_superuser=True).exists():
         try:
@@ -64,40 +104,6 @@ def create_model(m, init_event):
         for e in json.loads(file("../configs/custom_defaults/external.json").read()):
             de, _ = ExternalServer.objects.get_or_create(name=e['name'], url=e['url'])
             de.pull()
-    local_models_path = "../configs/custom_defaults/trained_models.json"
-    if 'INIT_MODELS' in os.environ and os.environ['INIT_MODELS'].strip():
-        remote_models_path = os.environ['INIT_MODELS']
-        if not remote_models_path.startswith('/root/DVA/configs/custom_defaults/'):
-            local_models_path = 'custom_models.json'
-            get_path_to_file(remote_models_path, local_models_path)
-        else:
-            local_models_path = remote_models_path
-    default_models = json.loads(file(local_models_path).read())
-    if not settings.KUBE_MODE:
-        init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True,
-                                           start_ts=timezone.now())
-        for m in default_models:
-            create_model(m, init_event)
+    init_models()
     if 'LAUNCH_SERVER' in os.environ or 'LAUNCH_SERVER_NGINX' in os.environ:
-        if settings.KUBE_MODE:
-            # todo(akshay): This code is prone to race condition when starting the cluster.
-            time.sleep(random.randint(0, 15))
-            init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True
-                                               , start_ts=timezone.now())
-            for m in default_models:
-                create_model(m, init_event)
-        if 'INIT_PROCESS' in os.environ:
-            path = os.environ.get('INIT_PROCESS', None)
-            if path and path.strip():
-                if not path.startswith('/root/DVA/configs/custom_defaults/'):
-                    get_path_to_file(path, "temp.json")
-                    path = 'temp.json'
-                try:
-                    jspec = json.load(file(path))
-                except:
-                    logging.exception("could not load : {}".format(path))
-                else:
-                    p = DVAPQLProcess()
-                    if DVAPQL.objects.count() == 0:
-                        p.create_from_json(jspec)
-                        p.launch()
+        init_process()