forked from decodingml/llm-twin-course
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
144 lines (103 loc) · 5.66 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
include .env
$(eval export $(shell sed -ne 's/ *#.*$$//; /./ s/=.*$$// p' .env))
AWS_CURRENT_REGION_ID := $(shell aws configure get region)
AWS_CURRENT_ACCOUNT_ID := $(shell aws sts get-caller-identity --query "Account" --output text --profile perso)
PYTHONPATH := $(shell pwd)
.PHONY: build-all env-var
RED := \033[0;31m
BLUE := \033[0;34m
GREEN := \033[0;32m
YELLOW := \033[0;33m
RESET := \033[0m
env-var:
@echo "Environment variable VAR is: ${RABBITMQ_HOST}"
help:
@grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done
# ------ Crawlers --------
local-build-1: # Build lambda crawler on local
docker compose -f docker-compose.yml build crawler
local-deploy-1: # Deploy lambda crawler custom docker image on local.
docker network create llm-twin-course_local
docker run -d \
--name mongo1 \
-p 30001:30001 \
--network llm-twin-course_local \
-v ./data/mongo-1:/data/db \
mongo:5 \
--bind_ip_all --port 30001
docker run \
--name crawler1 \
-p 9000:8080 \
-e MONGO_DATABASE_HOST=mongodb://mongo1:30001 \
--network llm-twin-course_local \
--platform linux/amd64 \
llm-twin-crawler:latest
local-clean-1:
docker stop mongo1 crawler 1 || true
docker rm mongo1 crawler1 || true
docker network rm llm-twin-course_local
local-test-1:
curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" \
-d '{"user": "Paul Iuztin", "link": "https://github.com/decodingml/llm-twin-course"}'
# ------ Infrastructure ------
push: # Build & push image to docker ECR (e.g make push IMAGE_TAG=latest)
echo "Logging into AWS ECR..."
aws ecr get-login-password --region $(AWS_CURRENT_REGION_ID) --profile perso | docker login --username AWS --password-stdin $(AWS_CURRENT_ACCOUNT_ID).dkr.ecr.$(AWS_CURRENT_REGION_ID).amazonaws.com
echo "Build & Push Docker image..."
docker buildx build --platform linux/amd64 -t $(AWS_CURRENT_ACCOUNT_ID).dkr.ecr.$(AWS_CURRENT_REGION_ID).amazonaws.com/crawler:$(IMAGE_TAG) -f .docker/Dockerfile.crawlers .
docker push $(AWS_CURRENT_ACCOUNT_ID).dkr.ecr.$(AWS_CURRENT_REGION_ID).amazonaws.com/crawler:$(IMAGE_TAG)
echo "Image pushed"
local-start: # Buil and start local infrastructure.
docker compose -f docker-compose.yml up --build -d
local-stop: # Stop local infrastructure.
docker compose -f docker-compose.yml down --remove-orphans
# ------ Crawler ------
local-test-medium: # Send test command on local to test the lambda with a Medium article
curl -X POST "http://localhost:9010/2015-03-31/functions/function/invocations" \
-d '{"user": "Paul Iuztin", "link": "https://medium.com/decodingml/an-end-to-end-framework-for-production-ready-llm-systems-by-building-your-llm-twin-2cc6bb01141f"}'
local-test-github: # Send test command on local to test the lambda with a Github repository
curl -X POST "http://localhost:9010/2015-03-31/functions/function/invocations" \
-d '{"user": "Paul Iuztin", "link": "https://github.com/decodingml/llm-twin-course"}'
cloud-test-github: # Send command to the cloud lambda with a Github repository
aws lambda invoke \
--function-name crawler \
--cli-binary-format raw-in-base64-out \
--payload '{"user": "Paul Iuztin", "link": "https://github.com/decodingml/llm-twin-course"}' \
response.json
# ------ RAG Feature Pipeline ------
local-feature-pipeline: # Run the RAG feature pipeline
RUST_BACKTRACE=full poetry run python -m bytewax.run 3-feature-pipeline/main.py
generate-dataset: # Generate dataset for finetuning and version it in Comet ML
docker exec -it llm-twin-bytewax python -m finetuning.generate_data
# ------ RAG ------
local-test-retriever: # Test retriever
docker exec -it llm-twin-bytewax python -m retriever
# ------ Qwak: Training pipeline ------
create-qwak-project: # Create Qwak project for serving the model
@echo "$(YELLOW)Creating Qwak project $(RESET)"
qwak models create "llm_twin" --project "llm-twin-course"
local-test-training-pipeline: # Test Qwak model locally
poetry run python test_local.py
deploy-training-pipeline: # Deploy the model to Qwak
@echo "$(YELLOW)Dumping poetry env requirements to $(RESET) $(GREEN) requirements.txt $(RESET)"
poetry export -f requirements.txt --output finetuning/requirements.txt --without-hashes
@echo "$(GREEN)Triggering Qwak Model Build$(RESET)"
poetry run qwak models build -f build_config.yaml .
# ------ Qwak: Inference pipeline ------
deploy-inference-pipeline: # Deploy the inference pipeline to Qwak.
poetry run qwak models deploy realtime --model-id "llm_twin" --instance "gpu.a10.2xl" --timeout 50000 --replicas 2 --server-workers 2
undeploy-infernece-pipeline: # Remove the inference pipeline deployment from Qwak.
poetry run qwak models undeploy --model-id "llm_twin"
call-inference-pipeline: # Call the inference pipeline.
poetry run python main.py
# ------ Superlinked Bonus Series ------
local-start-superlinked: # Buil and start local infrastructure used in the Superlinked series.
docker compose -f docker-compose-superlinked.yml up --build -d
local-stop-superlinked: # Stop local infrastructure used in the Superlinked series.
docker compose -f docker-compose-superlinked.yml down --remove-orphans
test-superlinked-server: # Ingest dummy data into the local superlinked server to check if it's working.
poetry run python 6-bonus-superlinked-rag/local_test.py
local-bytewax-superlinked: # Run the Bytewax streaming pipeline powered by Superlinked.
RUST_BACKTRACE=full poetry run python -m bytewax.run 6-bonus-superlinked-rag/main.py
local-test-retriever-superlinked: # Call the retrieval module and query the Superlinked server & vector DB
docker exec -it llm-twin-bytewax-superlinked python -m retriever