Skip to content

Commit 4525e62

Browse files
committedAug 7, 2023
lmtp: docker serve
1 parent b98b5b1 commit 4525e62

File tree

4 files changed

+69
-19
lines changed

4 files changed

+69
-19
lines changed
 

‎.dockerignore

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
api.env
22
**/node_modules/*
3+
34
*.tokens
45

56
dist
@@ -178,5 +179,9 @@ wip-snippets
178179
.lmql-algorithms-cache
179180

180181
*.tokens
182+
181183
scripts/Dockerfile*
184+
scripts/Dockerfile.serve
185+
scripts/lmql-serve-docker.py
182186
transformers-cache
187+
web/

‎scripts/Dockerfile.serve

+28-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
22

33
ENV PYTHONUNBUFFERED=1
4-
ENV PORT=8899
54

6-
# SYSTEM
5+
# NVIDIA GPU support
76
RUN apt-get update --yes --quiet && DEBIAN_FRONTEND=noninteractive apt-get install --yes --quiet --no-install-recommends \
87
software-properties-common \
98
build-essential apt-utils \
@@ -27,6 +26,7 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 9
2726

2827
RUN pip install --upgrade pip
2928

29+
# install lmql
3030
WORKDIR lmql
3131
VOLUME ~/.lmql/
3232
ARG GPU_ENABLED
@@ -35,13 +35,35 @@ HEALTHCHECK CMD curl --fail http://localhost:$PORT/ || exit 1
3535

3636
RUN apt-get update
3737

38+
# install release version of lmql (for dependencies)
39+
RUN pip install "lmql[hf]"
40+
3841
COPY . /lmql
39-
# install lmql from source
42+
# re-install lmql from source
4043
WORKDIR /lmql
4144
RUN pip install -e ".[hf]"
4245

43-
EXPOSE $PORT
44-
46+
VOLUME /transformers
4547
RUN ls /transformers
4648

47-
ENTRYPOINT ["lmql", "serve-model", "--port", "$PORT", "--host", "0.0.0.0"]
49+
ENV LMQL_VERSION="latest"
50+
51+
# checkout LMQL version
52+
RUN if [ "$LMQL_VERSION" != "latest" ]; then git checkout tags/$LMQL_VERSION; fi
53+
54+
# if ENV EXTRA_PIP_PACKAGES is set, install them at runtime (e.g. bitsandbytes or auto_gptq), not at build time
55+
ENV EXTRA_PIP_PACKAGES=""
56+
57+
# create run.sh
58+
RUN echo "#!/bin/bash \n\
59+
set -x \n\
60+
# install extra pip packages \n\
61+
if [ \"\$EXTRA_PIP_PACKAGES\" != \"\" ]; then \n\
62+
pip install \$EXTRA_PIP_PACKAGES \n\
63+
fi \n\
64+
# start lmql \n\
65+
lmql serve-model --port 8899 --host 0.0.0.0 --docker_hide_port \$@" > run.sh
66+
67+
RUN chmod +x run.sh
68+
69+
ENTRYPOINT ["./run.sh"]

‎scripts/lmql-serve-docker.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,22 @@ def has_docker_image():
77
return os.system(cmd) == 0
88

99
def build_docker_image():
10-
cmd = """sudo docker build -t lmql-serve -f Dockerfile.serve ."""
10+
ADDITIONAL_EXCLUDES = [
11+
]
12+
cmd = """sudo docker build -t lmql-serve -f scripts/Dockerfile.serve ."""
1113
print(">", cmd)
1214
os.system(cmd)
1315

14-
parser = argparse.ArgumentParser()
15-
parser.add_argument('--port', type=int, default=2223, help='Host port to expose the LMTP endpoint on')
16-
parser.add_argument('--gpus', type=str, default='all', help="GPUs to use, e.g. --gpu all, passed to 'docker run'")
17-
parser.add_argument('--transformers-cache', type=str, default='$HOME/.cache/huggingface/hub', help="Path to local directory to cache downloaded transformers models.")
16+
parser = argparse.ArgumentParser(description="""
17+
Runs 'lmql serve-model' in a docker container.
18+
19+
This scripts passes all arguments to 'lmql serve-model' in the docker container, except for the following:
20+
""".strip())
21+
parser.add_argument('--port', type=int, default=8080, help="Host port to expose the container's LMTP endpoint on. Default: 8080.")
22+
parser.add_argument('--gpus', type=str, default='all', help="GPUs to use, e.g. --gpu all, passed to 'docker run'. Default: all.")
23+
parser.add_argument('--transformers-cache', type=str, default='$HOME/.cache/huggingface/hub', help="Path to local directory to mount into the container as model cache.")
24+
parser.add_argument('--rebuild', action='store_true', help="Forces rebuilding the docker image")
25+
parser.add_argument('--extras', type=str, default='', help="Extra pip packages to install in the docker image before running lmql serve-model.")
1826
# all other args are passed to lmql-serve
1927
args, _ = parser.parse_known_args()
2028

@@ -26,20 +34,23 @@ def build_docker_image():
2634
# otherwise, replace $HOME with current directory
2735
args.transformers_cache = args.transformers_cache.replace("$HOME", ".")
2836

29-
if not has_docker_image():
37+
if not has_docker_image() or args.rebuild:
3038
build_docker_image()
3139

3240
PORT=2223
3341
GPUS=all
3442

3543
cmd = """sudo docker run \\
3644
-p $PORT:8899 \\
37-
-e PORT=8899 \\
38-
-e TRANSFORMERS_CACHE=/transformers \\
45+
-e TRANSFORMERS_CACHE=/transformers $EXTRAS \\
3946
-it --gpus $GPUS \\
4047
-v $CACHE:/transformers \\
41-
lmql-serve --cuda $@
42-
""".replace("$GPUS", args.gpus).replace("$PORT", str(args.port)).replace("$@", " ".join(_)).replace("$CACHE", args.transformers_cache)
48+
lmql-serve $@
49+
""".replace("$GPUS", args.gpus) \
50+
.replace("$PORT", str(args.port)) \
51+
.replace("$@", " ".join(_)) \
52+
.replace("$CACHE", args.transformers_cache) \
53+
.replace("$EXTRAS", f"-e EXTRA_PIP_PACKAGES='{args.extras}'" if args.extras != "" else "")
4354

4455
print(">", cmd)
4556
os.system(cmd)

‎src/lmql/models/lmtp/lmtp_serve.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .lmtp_inference_server import *
66
from .utils import rename_model_args
77
from .lmtp_balance import balance_main
8+
from .lmtp_layout import layout_main
89

910
def serve(model_name, host="localhost", port=8080, cuda=False, dtype=None, static=False, loader=None, **kwargs):
1011
"""
@@ -40,10 +41,12 @@ def lmtp_serve_main(model_args):
4041

4142
# extract explicit arguments
4243
host = model_args.pop("host", "localhost")
43-
port = model_args.pop("port", 8080)
44+
port = int(model_args.pop("port", 8080))
4445
model = model_args.pop("model", None)
4546
single_thread = model_args.pop("single_thread", False)
4647
static = model_args.pop("static", False) or single_thread
48+
# in Docker, don't show the port (it's not accessible from outside the container anyway)
49+
docker_hide_port = model_args.pop("docker_hide_port", False)
4750

4851
assert not single_thread or model != "auto", "Cannot use --single_thread mode with model 'auto'. Please specify a specific model to load."
4952

@@ -68,7 +71,10 @@ async def stream(request):
6871

6972
def web_print(*args):
7073
if len(args) == 1 and args[0].startswith("======== Running on"):
71-
print(f"[Serving LMTP endpoint on ws://{host}:{port}/]")
74+
if docker_hide_port:
75+
print(f"[Serving LMTP endpoint on Docker container port]")
76+
else:
77+
print(f"[Serving LMTP endpoint on ws://{host}:{port}/]")
7278
else:
7379
print(*args)
7480

@@ -82,7 +88,7 @@ def argparser(args):
8288
next_argument_name = None
8389

8490
kwargs = {}
85-
flag_args = ["cuda", "static", "single_thread"]
91+
flag_args = ["cuda", "static", "single_thread", "docker_hide_port"]
8692

8793
help_text = """
8894
usage: serve-model [-h] [--port PORT] [--host HOST] [--cuda] [--dtype DTYPE] [--[*] VALUE] model
@@ -96,6 +102,8 @@ def argparser(args):
96102
--host HOST
97103
--cuda
98104
--static If set, the model cannot be switched on client request but remains fixed to the model specified in the model argument.
105+
--single_thread Run the model on the main thread. This can lead to increased latency when processing multiple requests, but is necessary for some models that
106+
cannot be run in the background.
99107
--dtype DTYPE What format to load the model weights. Options: 'float16'
100108
(not available on all models), '8bit' (requires bitsandbytes)
101109
--loader OPT If set, the model will be loaded using the corresponding option. Useful for loading quantized modules in formats not
@@ -154,6 +162,10 @@ def cli(args=None):
154162
args = args[1:]
155163
balance_main(args)
156164
return
165+
elif "--layout" in args:
166+
# instead of running directly, with a layout we are launching the
167+
# relevant worker subprocesses, which in turn call lmtp_serve_main
168+
layout_main(args)
157169
else:
158170
args = argparser(args)
159171
lmtp_serve_main(args)

0 commit comments

Comments
 (0)
Please sign in to comment.