forked from TabbyML/tabby
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add python transformer backend for tabby (mainly used for local dev /…
… test in non-cuda environment) (TabbyML#6) * Add python backend * Split docker-compose.triton.yml * update makefile
- Loading branch information
Showing
7 changed files
with
98 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,35 @@ | ||
POETRY_EXISTS := $(shell which poetry &> /dev/null) | ||
PRE_COMMIT_HOOK := .git/hooks/pre-commit | ||
LOCAL_MODEL := testdata/tiny-70M/models/fastertransformer/1 | ||
|
||
all: | ||
|
||
pre-commit: | ||
poetry run pre-commit | ||
|
||
install-poetry: | ||
ifndef POETRY_EXISTS | ||
curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.4.0 python3 - | ||
endif | ||
poetry install | ||
|
||
$(PRE_COMMIT_HOOK): | ||
poetry run pre-commit install --install-hooks | ||
|
||
$(LOCAL_MODEL): | ||
poetry run python converter/huggingface_gptneox_convert.py \ | ||
-in_file EleutherAI/pythia-70m-deduped \ | ||
-o $@ \ | ||
-i_g 1 -m_n tiny-70M -p 1 -w fp16 | ||
|
||
setup-development-environment: install-poetry $(LOCAL_MODEL) | ||
setup-development-environment: install-poetry $(PRE_COMMIT_HOOK) | ||
|
||
up: $(LOCAL_MODEL) | ||
up: | ||
docker-compose -f deployment/docker-compose.yml up | ||
|
||
dev: $(setup-development-environment) $(LOCAL_MODEL) | ||
up-triton: $(LOCAL_MODEL) | ||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml up | ||
|
||
dev: | ||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.dev.yml up --build | ||
|
||
dev-triton: $(LOCAL_MODEL) | ||
docker-compose -f deployment/docker-compose.yml -f deployment/docker-compose.triton.yml -f deployment/docker-compose.dev.yml up --build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
logs | ||
hf_cache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
version: '3.3' | ||
|
||
services: | ||
server: | ||
image: tabbyml/tabby | ||
environment: | ||
- MODEL_BACKEND=triton | ||
- TRITON_TOKENIZER_NAME=/tokenizer | ||
volumes: | ||
- ../testdata/tiny-70M/tokenizer:/tokenizer | ||
links: | ||
- triton | ||
|
||
admin: | ||
links: | ||
- triton | ||
|
||
|
||
|
||
triton: | ||
image: tabbyml/fastertransformer_backend | ||
container_name: tabby-triton | ||
command: mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model | ||
shm_size: 1gb | ||
volumes: | ||
- ../testdata/tiny-70M/models:/model | ||
deploy: | ||
resources: | ||
reservations: | ||
devices: | ||
- driver: nvidia | ||
count: all | ||
capabilities: [gpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import random | ||
import string | ||
import time | ||
from typing import List | ||
|
||
from models import Choice, CompletionRequest, CompletionResponse | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
|
||
|
||
class PythonModelService: | ||
def __init__( | ||
self, | ||
model_name, | ||
): | ||
self.tokenizer = AutoTokenizer.from_pretrained(model_name) | ||
self.model = AutoModelForCausalLM.from_pretrained(model_name) | ||
|
||
def generate(self, request: CompletionRequest) -> List[Choice]: | ||
input_ids = self.tokenizer.encode(request.prompt, return_tensors="pt") | ||
res = self.model.generate(input_ids, max_length=64) | ||
output_ids = res[0][len(input_ids[0]) :] | ||
text = self.tokenizer.decode(output_ids) | ||
return [Choice(index=0, text=text)] | ||
|
||
def __call__(self, request: CompletionRequest) -> CompletionResponse: | ||
choices = self.generate(request) | ||
return CompletionResponse( | ||
id=random_completion_id(), created=int(time.time()), choices=choices | ||
) | ||
|
||
|
||
def random_completion_id(): | ||
return "cmpl-" + "".join( | ||
random.choice(string.ascii_letters + string.digits) for _ in range(29) | ||
) |