Skip to content

Commit

Permalink
fixed microsoft#50; reduced local deployments
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktreat committed Apr 6, 2023
1 parent 766e44b commit bc66e5a
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 94 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# for server
server/models/*
!server/models/download.sh
!server/models/download.ps1
server/logs/
server/models_dev
server/public/*
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ We introduce a collaborative system that consists of **an LLM as the controller*
### Default

+ Ubuntu 16.04 LTS
+ NVIDIA GeForce RTX 3090 * 1
+ VRAM >= 12GB
+ RAM > 12GB (minimal), 16GB (standard), 42GB (full)
+ Disk > 78G (with 42G for `damo-vilab/text-to-video-ms-1.7b`)

### Minimum

+ Ubuntu 16.04 LTS
+ Nothing else

The configuration `lite.yaml` does not require any expert models to be downloaded and deployed locally. However, it means that Jarvis is restricted to models running stably on HuggingFace Inference Endpoints.

Expand All @@ -49,8 +51,6 @@ First replace `openai.key` and `huggingface.token` in `server/config.yaml` with

### For Server:



```bash
# setup env
cd server
Expand Down Expand Up @@ -104,6 +104,9 @@ cd web
npm install
npm run dev
```

Here's a tip, you can switch to ChatGPT by `double click` on the setting icon!

Note that in order to display the video properly in HTML, you need to compile `ffmpeg` manually with H.264

```bash
Expand Down
16 changes: 10 additions & 6 deletions server/awesome_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,12 @@
raise ValueError("Incrorrect OpenAI key. Please check your config.yaml file.")
OPENAI_KEY = config["openai"]["key"]
endpoint = f"https://api.openai.com/v1/{api_name}"
HEADER = {
"Authorization": f"Bearer {OPENAI_KEY}"
}
if OPENAI_KEY.startswith("sk-"):
HEADER = {
"Authorization": f"Bearer {OPENAI_KEY}"
}
else:
HEADER = None
else:
endpoint = f"{config['local']['endpoint']}/v1/{api_name}"
HEADER = None
Expand Down Expand Up @@ -163,10 +166,11 @@ def send_request(data):
openaikey = data.pop("openaikey")
if use_completion:
data = convert_chat_to_completion(data)
if "openaikey" in data:
if openaikey and openaikey.startswith("sk-"):
HEADER = {
"Authorization": f"Bearer {data['openaikey']}"
"Authorization": f"Bearer {openaikey}"
}

response = requests.post(endpoint, json=data, headers=HEADER, proxies=PROXY)
logger.debug(response.text.strip())
if use_completion:
Expand Down Expand Up @@ -772,7 +776,7 @@ def run_task(input, command, results, openaikey = None):
choose = {"id": best_model_id, "reason": reason}
messages = [{
"role": "user",
"content": f"[ {input} ] contains a task in JSON format {command}, 'task' indicates the task type and 'args' indicates the arguments required for the task. Don't explain the task to me, just help me do it and give me the result. The result can must be in text form."
"content": f"[ {input} ] contains a task in JSON format {command}, 'task' indicates the task type and 'args' indicates the arguments required for the task. Don't explain the task to me, just help me do it and give me the result. The result must be in text form without any urls."
}]
response = chitchat(messages, openaikey)
results[id] = collect_result(command, choose, {"response": response})
Expand Down
5 changes: 0 additions & 5 deletions server/models/download.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,11 @@ $models = @(
"lllyasviel/sd-controlnet-scribble",
"lllyasviel/sd-controlnet-seg",
"runwayml/stable-diffusion-v1-5",
"Salesforce/blip-image-captioning-large",
"damo-vilab/text-to-video-ms-1.7b",
"microsoft/speecht5_asr",
"facebook/maskformer-swin-large-ade",
"microsoft/biogpt",
"facebook/esm2_t12_35M_UR50D",
"JorisCos/DCCRNet_Libri1Mix_enhsingle_16k",
"espnet/kan-bayashi_ljspeech_vits",
"facebook/detr-resnet-101",
"microsoft/speecht5_tts",
"microsoft/speecht5_hifigan",
"microsoft/speecht5_vc",
"openai/whisper-base",
Expand Down
112 changes: 53 additions & 59 deletions server/models/download.sh
Original file line number Diff line number Diff line change
@@ -1,63 +1,57 @@
models="
nlpconnect/vit-gpt2-image-captioning
lllyasviel/ControlNet
lllyasviel/sd-controlnet-canny
lllyasviel/sd-controlnet-depth
lllyasviel/sd-controlnet-hed
lllyasviel/sd-controlnet-mlsd
lllyasviel/sd-controlnet-openpose
lllyasviel/sd-controlnet-scribble
lllyasviel/sd-controlnet-seg
runwayml/stable-diffusion-v1-5
Salesforce/blip-image-captioning-large
damo-vilab/text-to-video-ms-1.7b
microsoft/speecht5_asr
facebook/maskformer-swin-large-ade
microsoft/biogpt
facebook/esm2_t12_35M_UR50D
JorisCos/DCCRNet_Libri1Mix_enhsingle_16k
espnet/kan-bayashi_ljspeech_vits
facebook/detr-resnet-101
microsoft/speecht5_tts
microsoft/speecht5_hifigan
microsoft/speecht5_vc
openai/whisper-base
Intel/dpt-large
facebook/detr-resnet-50-panoptic
facebook/detr-resnet-50
google/owlvit-base-patch32
impira/layoutlm-document-qa
ydshieh/vit-gpt2-coco-en
dandelin/vilt-b32-finetuned-vqa
lambdalabs/sd-image-variations-diffusers
facebook/maskformer-swin-base-coco
Intel/dpt-hybrid-midas
"
#!/bin/bash

# CURRENT_DIR=$(cd `dirname $0`; pwd)
CURRENT_DIR=$(pwd)
for model in $models;
do
echo "----- Downloading from https://huggingface.co/"$model" -----"
if [ -d "$model" ]; then
# cd $model && git reset --hard && git pull && git lfs pull
cd $model && git pull && git lfs pull
cd $CURRENT_DIR
else
# git clone 包含了lfs
git clone https://huggingface.co/$model $model
fi
done
# Set models and datasets to download
models=(
"nlpconnect/vit-gpt2-image-captioning"
"lllyasviel/ControlNet"
"lllyasviel/sd-controlnet-canny"
"lllyasviel/sd-controlnet-depth"
"lllyasviel/sd-controlnet-hed"
"lllyasviel/sd-controlnet-mlsd"
"lllyasviel/sd-controlnet-openpose"
"lllyasviel/sd-controlnet-scribble"
"lllyasviel/sd-controlnet-seg"
"runwayml/stable-diffusion-v1-5"
"damo-vilab/text-to-video-ms-1.7b"
"microsoft/speecht5_asr"
"JorisCos/DCCRNet_Libri1Mix_enhsingle_16k"
"espnet/kan-bayashi_ljspeech_vits"
"facebook/detr-resnet-101"
"microsoft/speecht5_hifigan"
"microsoft/speecht5_vc"
"openai/whisper-base"
"Intel/dpt-large"
"facebook/detr-resnet-50-panoptic"
"facebook/detr-resnet-50"
"google/owlvit-base-patch32"
"impira/layoutlm-document-qa"
"ydshieh/vit-gpt2-coco-en"
"dandelin/vilt-b32-finetuned-vqa"
"lambdalabs/sd-image-variations-diffusers"
"facebook/maskformer-swin-base-coco"
"Intel/dpt-hybrid-midas"
)
datasets=("Matthijs/cmu-arctic-xvectors")

datasets="Matthijs/cmu-arctic-xvectors"
# Set the current directory
CURRENT_DIR=$(pwd)

for dataset in $datasets;
do
echo "----- Downloading from https://huggingface.co/datasets/"$dataset" -----"
if [ -d "$dataset" ]; then
cd $dataset && git pull && git lfs pull
cd $CURRENT_DIR
else
git clone https://huggingface.co/datasets/$dataset $dataset
fi
# Download models
for model in "${models[@]}"; do
echo "----- Downloading from https://huggingface.co/${model} -----"
if [ -d "${model}" ]; then
(cd "${model}" && git pull && git lfs pull)
else
git clone --recurse-submodules "https://huggingface.co/${model}" "${model}"
fi
done

# Download datasets
for dataset in "${datasets[@]}"; do
echo "----- Downloading from https://huggingface.co/datasets/${dataset} -----"
if [ -d "${dataset}" ]; then
(cd "${dataset}" && git pull && git lfs pull)
else
git clone --recurse-submodules "https://huggingface.co/datasets/${dataset}" "${dataset}"
fi
done
40 changes: 20 additions & 20 deletions server/models_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@
start = time.time()

local_fold = "models"
if args.config.endswith(".dev"):
local_fold = "models_dev"
# if args.config.endswith(".dev"):
# local_fold = "models_dev"


def load_pipes(local_deployment):
Expand All @@ -89,20 +89,20 @@ def load_pipes(local_deployment):
"tokenizer": AutoTokenizer.from_pretrained(f"{local_fold}/nlpconnect/vit-gpt2-image-captioning"),
"device": "cuda:0"
},
"Salesforce/blip-image-captioning-large": {
"model": BlipForConditionalGeneration.from_pretrained(f"{local_fold}/Salesforce/blip-image-captioning-large"),
"processor": BlipProcessor.from_pretrained(f"{local_fold}/Salesforce/blip-image-captioning-large"),
"device": "cuda:0"
},
# "Salesforce/blip-image-captioning-large": {
# "model": BlipForConditionalGeneration.from_pretrained(f"{local_fold}/Salesforce/blip-image-captioning-large"),
# "processor": BlipProcessor.from_pretrained(f"{local_fold}/Salesforce/blip-image-captioning-large"),
# "device": "cuda:0"
# },
"damo-vilab/text-to-video-ms-1.7b": {
"model": DiffusionPipeline.from_pretrained(f"{local_fold}/damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"),
"device": "cuda:0"
},
"facebook/maskformer-swin-large-ade": {
"model": MaskFormerForInstanceSegmentation.from_pretrained(f"{local_fold}/facebook/maskformer-swin-large-ade"),
"feature_extractor" : AutoFeatureExtractor.from_pretrained("facebook/maskformer-swin-large-ade"),
"device": "cuda:0"
},
# "facebook/maskformer-swin-large-ade": {
# "model": MaskFormerForInstanceSegmentation.from_pretrained(f"{local_fold}/facebook/maskformer-swin-large-ade"),
# "feature_extractor" : AutoFeatureExtractor.from_pretrained("facebook/maskformer-swin-large-ade"),
# "device": "cuda:0"
# },
# "microsoft/trocr-base-printed": {
# "processor": TrOCRProcessor.from_pretrained(f"{local_fold}/microsoft/trocr-base-printed"),
# "model": VisionEncoderDecoderModel.from_pretrained(f"{local_fold}/microsoft/trocr-base-printed"),
Expand Down Expand Up @@ -137,13 +137,13 @@ def load_pipes(local_deployment):
"model": DiffusionPipeline.from_pretrained(f"{local_fold}/runwayml/stable-diffusion-v1-5"),
"device": "cuda:0"
},
"microsoft/speecht5_tts":{
"processor": SpeechT5Processor.from_pretrained(f"{local_fold}/microsoft/speecht5_tts"),
"model": SpeechT5ForTextToSpeech.from_pretrained(f"{local_fold}/microsoft/speecht5_tts"),
"vocoder": SpeechT5HifiGan.from_pretrained(f"{local_fold}/microsoft/speecht5_hifigan"),
"embeddings_dataset": load_dataset(f"{local_fold}/Matthijs/cmu-arctic-xvectors", split="validation"),
"device": "cuda:0"
},
# "microsoft/speecht5_tts":{
# "processor": SpeechT5Processor.from_pretrained(f"{local_fold}/microsoft/speecht5_tts"),
# "model": SpeechT5ForTextToSpeech.from_pretrained(f"{local_fold}/microsoft/speecht5_tts"),
# "vocoder": SpeechT5HifiGan.from_pretrained(f"{local_fold}/microsoft/speecht5_hifigan"),
# "embeddings_dataset": load_dataset(f"{local_fold}/Matthijs/cmu-arctic-xvectors", split="validation"),
# "device": "cuda:0"
# },
# "speechbrain/mtl-mimic-voicebank": {
# "model": WaveformEnhancement.from_hparams(source="speechbrain/mtl-mimic-voicebank", savedir="models/mtl-mimic-voicebank"),
# "device": "cuda:0"
Expand Down Expand Up @@ -458,7 +458,7 @@ def models(model_id):
generated_text = pipes[model_id]["tokenizer"].batch_decode(generated_ids, skip_special_tokens=True)[0]
result = {"generated text": generated_text}
# image to text: OCR
if model_id == "microsoft/trocr-base-printed" or model_id == "microsoft/trocr-base-handwritten" :
if model_id == "microsoft/trocr-base-printed" or model_id == "microsoft/trocr-base-handwritten":
image = load_image(request.get_json()["img_url"]).convert("RGB")
pixel_values = pipes[model_id]["processor"](image, return_tensors="pt").pixel_values
pixel_values = pixel_values.to(pipes[model_id]["device"])
Expand Down
2 changes: 1 addition & 1 deletion server/run_gradio_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def bot(messages):
# response = requests.post("http://localhost:8004/hugginggpt", json={"messages": all_messages, "openaikey": OPENAI_KEY})
# message = response.json()["message"]
# print(message)
message = chat_huggingface(all_messages, OPENAI_KEY)
message = chat_huggingface(all_messages, OPENAI_KEY)["message"]
image_urls, audio_urls, video_urls = extract_medias(message)
add_message(message, "assistant")
messages[-1][1] = message
Expand Down

0 comments on commit bc66e5a

Please sign in to comment.