-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathcog.yaml
50 lines (45 loc) · 1.94 KB
/
cog.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
build:
# set to true if your model requires a GPU
gpu: true
cuda: "11.8"
# python version in the form '3.8' or '3.8.12'
python_version: "3.11"
# a list of packages in the format <package-name>==<version>
python_packages:
- "numpy==1.24.2"
- "sentencepiece==0.1.99"
- "jinja2==3.1.2"
- "scipy==1.11.1"
- "safetensors>=0.3.1"
- "python-dotenv"
- "fire"
- "datasets"
- "transformers==4.33.2"
- "peft==0.4.0"
- "accelerate"
- "bitsandbytes"
- "trl==0.5.0"
- "aiohttp[speedups]"
- "triton" # hm
- "fastapi<0.99.0"
# uncomment these when we go back to 12.1
# - "https://r2.drysys.workers.dev/torch/torch-2.1.0-cp311-cp311-linux_x86_64.whl"
# - "https://weights.replicate.delivery/default/wheels/vllm-0.2a0-cp311-cp311-linux_x86_64.whl"
- "https://r2.drysys.workers.dev/torch/11.8/torch-2.1.0-cp311-cp311-linux_x86_64.whl"
# This wheel can be built by running `TORCH_CUDA_ARCH_LIST="8.0;8.6" pip wheel .` in https://github.com/replicate/vllm-with-loras
- "https://r2.drysys.workers.dev/vllm/11.8/vllm-0.2a0-cp311-cp311-linux_x86_64.whl"
- "https://r2.drysys.workers.dev/xformers/11.8/xformers-0.0.23+b4c853d.d20231107-cp311-cp311-linux_x86_64.whl"
- "--pre -f https://mlc.ai/wheels"
- "mlc-chat-nightly-cu118"
- "mlc-ai-nightly-cu118"
# - "mlc-chat-nightly-cu121"
# - "mlc-ai-nightly-cu121"
run:
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.6/pget" && chmod +x /usr/local/bin/pget
# since we can't do LD_LIBRARY_PATH=torch/lib, use this to make sure mlc can access the cuda libs bundled with torch
- bash -c 'ln -s /usr/local/lib/python3.11/site-packages/torch/lib/lib{nv,cu}* /usr/lib'
# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
train: "train.py:train"