Skip to content

Commit

Permalink
Move kube_shell into dvactl with defaults. Avoid race in kube mode by
Browse files Browse the repository at this point in the history
creating initial models on scheduler, which is also launched before
webserver and is a singleton.

Soon: Export trained approximators. FAISS K-NN graph.

Next:

- Implement pre-condition checks, failed to launch error
messages for processes. Rollback created models on failure to launch.

- Implement clustering.

- Implement location net, fine-tuning / training face recognition models
  • Loading branch information
akshay bhat committed Jun 9, 2018
1 parent b1d2a16 commit c7f1675
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 43 deletions.
10 changes: 5 additions & 5 deletions deploy/kube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,13 @@ def launch_kube():
run_commands(init_commands)
print "sleeping for 120 seconds"
time.sleep(120)
webserver_commands = ['kubectl create -n {} -f deploy/kube/webserver.yaml'.format(namespace), ]
run_commands(webserver_commands)
print "webserver launched, sleeping for 60 seconds"
time.sleep(60)
scheduler_commands = ['kubectl create -n {} -f deploy/kube/scheduler.yaml'.format(namespace), ]
run_commands(scheduler_commands)
print "scheduler launched, sleeping for 10 seconds"
print "scheduler launched, sleeping for 90 seconds"
time.sleep(90)
webserver_commands = ['kubectl create -n {} -f deploy/kube/webserver.yaml'.format(namespace), ]
run_commands(webserver_commands)
print "webserver launched, sleeping for 10 seconds"
time.sleep(10)
commands = []
worker_template = file('./deploy/kube/worker.yaml.template').read()
Expand Down
2 changes: 1 addition & 1 deletion dvactl
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ if __name__ == '__main__':
./dvactl start
./dvactl auth # recreates creds.json
./dvactl exec -f script.json # run process using creds.json and REST API
./dvactl shell -c (container default:webserver) -pod (default:empty) # enter into a shell
./dvactl shell --container (container default:webserver) --pod (default:empty) # enter into a shell
./dvactl stop
./dvactl clean
"""
Expand Down
80 changes: 43 additions & 37 deletions server/init_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from dvaapp.fs import get_path_to_file
from django.utils import timezone


def create_model(m, init_event):
try:
if m['model_type'] == TrainedModel.DETECTOR:
Expand All @@ -44,6 +43,47 @@ def create_model(m, init_event):
pass


def init_models():
# In Kube mode create models when scheduler is launched which is always the first container.
local_models_path = "../configs/custom_defaults/trained_models.json"
if 'INIT_MODELS' in os.environ and os.environ['INIT_MODELS'].strip():
remote_models_path = os.environ['INIT_MODELS']
if not remote_models_path.startswith('/root/DVA/configs/custom_defaults/'):
local_models_path = 'custom_models.json'
get_path_to_file(remote_models_path, local_models_path)
else:
local_models_path = remote_models_path
default_models = json.loads(file(local_models_path).read())
if settings.KUBE_MODE and 'LAUNCH_SCHEDULER' in os.environ:
init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True
, start_ts=timezone.now())
for m in default_models:
create_model(m, init_event)
elif not settings.KUBE_MODE:
init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True,
start_ts=timezone.now())
for m in default_models:
create_model(m, init_event)


def init_process():
if 'INIT_PROCESS' in os.environ:
path = os.environ.get('INIT_PROCESS', None)
if path and path.strip():
if not path.startswith('/root/DVA/configs/custom_defaults/'):
get_path_to_file(path, "temp.json")
path = 'temp.json'
try:
jspec = json.load(file(path))
except:
logging.exception("could not load : {}".format(path))
else:
p = DVAPQLProcess()
if DVAPQL.objects.count() == 0:
p.create_from_json(jspec)
p.launch()


if __name__ == "__main__":
if 'SUPERUSER' in os.environ and not User.objects.filter(is_superuser=True).exists():
try:
Expand All @@ -64,40 +104,6 @@ def create_model(m, init_event):
for e in json.loads(file("../configs/custom_defaults/external.json").read()):
de, _ = ExternalServer.objects.get_or_create(name=e['name'], url=e['url'])
de.pull()
local_models_path = "../configs/custom_defaults/trained_models.json"
if 'INIT_MODELS' in os.environ and os.environ['INIT_MODELS'].strip():
remote_models_path = os.environ['INIT_MODELS']
if not remote_models_path.startswith('/root/DVA/configs/custom_defaults/'):
local_models_path = 'custom_models.json'
get_path_to_file(remote_models_path, local_models_path)
else:
local_models_path = remote_models_path
default_models = json.loads(file(local_models_path).read())
if not settings.KUBE_MODE:
init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True,
start_ts=timezone.now())
for m in default_models:
create_model(m, init_event)
init_models()
if 'LAUNCH_SERVER' in os.environ or 'LAUNCH_SERVER_NGINX' in os.environ:
if settings.KUBE_MODE:
# todo(akshay): This code is prone to race condition when starting the cluster.
time.sleep(random.randint(0, 15))
init_event = TEvent.objects.create(operation="perform_init", duration=0, started=True, completed=True
, start_ts=timezone.now())
for m in default_models:
create_model(m, init_event)
if 'INIT_PROCESS' in os.environ:
path = os.environ.get('INIT_PROCESS', None)
if path and path.strip():
if not path.startswith('/root/DVA/configs/custom_defaults/'):
get_path_to_file(path, "temp.json")
path = 'temp.json'
try:
jspec = json.load(file(path))
except:
logging.exception("could not load : {}".format(path))
else:
p = DVAPQLProcess()
if DVAPQL.objects.count() == 0:
p.create_from_json(jspec)
p.launch()
init_process()

0 comments on commit c7f1675

Please sign in to comment.