Skip to content

Commit

Permalink
[deps] use cuda version of torch for llm (ray-project#50671)
Browse files Browse the repository at this point in the history
with constraint files that are cuda version awared

Signed-off-by: Lonnie Liu <[email protected]>
  • Loading branch information
aslonnie authored Feb 18, 2025
1 parent d2600e7 commit 8e4097c
Show file tree
Hide file tree
Showing 18 changed files with 30,700 additions and 13 deletions.
2 changes: 2 additions & 0 deletions .buildkite/llm.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ steps:
env:
IMAGE_TO: "llmbuild"
IMAGE_FROM: "cr.ray.io/rayproject/oss-ci-base_build-py3.11"
RAY_CUDA_CODE: "cpu"

- name: llmgpubuild
wanda: ci/docker/llm.build.wanda.yaml
Expand All @@ -17,6 +18,7 @@ steps:
env:
IMAGE_TO: "llmgpubuild"
IMAGE_FROM: "cr.ray.io/rayproject/oss-ci-base_gpu-py3.11"
RAY_CUDA_CODE: "cu121"

- label: "llm cpu tests"
key: "llm-cpu-tests"
Expand Down
33 changes: 24 additions & 9 deletions ci/compile_llm_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,59 @@
set -euo pipefail

PYTHON_CODE="$(python -c "import sys; v=sys.version_info; print(f'py{v.major}{v.minor}')")"
CUDA_CODE="${CUDA_CODE:-cpu}"
PYTHON_CUDA_CODE="${PYTHON_CODE}_${CUDA_CODE}"

echo "--- Compile dependencies for ${PYTHON_CODE}"

PIP_COMPILE=(pip-compile -v --generate-hashes --strip-extras --unsafe-package ray)
PIP_COMPILE=(
pip-compile -v --generate-hashes --strip-extras --unsafe-package ray
--extra-index-url "https://download.pytorch.org/whl/${CUDA_CODE}"
--find-links "https://data.pyg.org/whl/torch-2.3.0+${CUDA_CODE}.html"
)

mkdir -p /tmp/ray-deps

# Remove the GPU constraints
cp python/requirements_compiled.txt /tmp/ray-deps/requirements_compiled.txt
sed -i '/^--extra-index-url /d' /tmp/ray-deps/requirements_compiled.txt
sed -i '/^--find-links /d' /tmp/ray-deps/requirements_compiled.txt

# First, extract base test dependencies from the current compiled mono repo one.
# This also expands to the indirect dependencies for this Python version & platform.
#
# Needs to use the exact torch version.
echo "--- Compile ray base test dependencies"
"${PIP_COMPILE[@]}" \
-c "python/requirements_compiled.txt" \
-c "/tmp/ray-deps/requirements_compiled.txt" \
"python/requirements.txt" \
"python/requirements/base-test-requirements.txt" \
-o "python/requirements_compiled_ray_test_${PYTHON_CODE}.txt"
-o "python/requirements_compiled_ray_test_${PYTHON_CUDA_CODE}.txt"

# Second, expand it into LLM test dependencies
echo "--- Compile LLM test dependencies"
"${PIP_COMPILE[@]}" \
-c "python/requirements_compiled_ray_test_${PYTHON_CODE}.txt" \
-c "python/requirements_compiled_ray_test_${PYTHON_CUDA_CODE}.txt" \
"python/requirements.txt" \
"python/requirements/base-test-requirements.txt" \
"python/requirements/llm/llm-requirements.txt" \
"python/requirements/llm/llm-test-requirements.txt" \
-o "python/requirements_compiled_rayllm_test_${PYTHON_CODE}.txt"
-o "python/requirements_compiled_rayllm_test_${PYTHON_CUDA_CODE}.txt"

# Third, extract the ray base dependencies from ray base test dependencies.
# TODO(aslonnie): This should be used for installing ray in the container images.
echo "--- Compile ray base test dependencies"
"${PIP_COMPILE[@]}" \
-c "python/requirements_compiled_ray_test_${PYTHON_CODE}.txt" \
-c "python/requirements_compiled_ray_test_${PYTHON_CUDA_CODE}.txt" \
"python/requirements.txt" \
-o "python/requirements_compiled_ray_${PYTHON_CODE}.txt"
-o "python/requirements_compiled_ray_${PYTHON_CUDA_CODE}.txt"

# Finally, extract the LLM dependencies from the LLM test dependencies,
# which is also an expansion of the ray base dependencies.
# TODO(aslonnie): This should be used for installing ray[llm] in the container images.
echo "--- Compile LLM dependencies"
"${PIP_COMPILE[@]}" \
-c "python/requirements_compiled_rayllm_test_${PYTHON_CODE}.txt" \
-c "python/requirements_compiled_rayllm_test_${PYTHON_CUDA_CODE}.txt" \
"python/requirements.txt" \
"python/requirements/llm/llm-requirements.txt" \
-o "python/requirements_compiled_rayllm_${PYTHON_CODE}.txt"
-o "python/requirements_compiled_rayllm_${PYTHON_CUDA_CODE}.txt"
3 changes: 2 additions & 1 deletion ci/docker/llm.build.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ARG DOCKER_IMAGE_BASE_BUILD=cr.ray.io/rayproject/oss-ci-base_build-py3.11
FROM $DOCKER_IMAGE_BASE_BUILD

ARG RAY_CI_JAVA_BUILD=
ARG RAY_CUDA_CODE=cpu

SHELL ["/bin/bash", "-ice"]

Expand All @@ -16,6 +17,6 @@ set -euo pipefail

SKIP_PYTHON_PACKAGES=1 ./ci/env/install-dependencies.sh

pip install --no-deps -r python/requirements_compiled_rayllm_test_py311.txt
pip install --no-deps -r python/requirements_compiled_rayllm_test_py311_$RAY_CUDA_CODE.txt

EOF
4 changes: 3 additions & 1 deletion ci/docker/llm.build.wanda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ srcs:
- ci/env/install-dependencies.sh
- ci/env/install-llvm-binaries.sh
- ci/suppress_output
- python/requirements_compiled_rayllm_test_py311.txt
- python/requirements_compiled_rayllm_test_py311_cpu.txt
- python/requirements_compiled_rayllm_test_py311_cu121.txt
tags:
- cr.ray.io/rayproject/$IMAGE_TO
build_args:
- DOCKER_IMAGE_BASE_BUILD=$IMAGE_FROM
- RAY_CUDA_CODE=$RAY_CUDA_CODE
2 changes: 1 addition & 1 deletion ci/docker/ray-llm.base.wanda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ froms: ["cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base"]
dockerfile: docker/ray-llm/Dockerfile
srcs:
- python/requirements.txt
- python/requirements_compiled_rayllm_py311.txt
- python/requirements_compiled_rayllm_py311_cu124.txt
build_args:
- BASE_IMAGE=cr.ray.io/rayproject/ray-py$PYTHON_VERSION-cu$CUDA_VERSION-base
tags:
Expand Down
5 changes: 4 additions & 1 deletion docker/ray-llm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@ set -euo pipefail

PYTHON_CODE="$(python -c "import sys; v=sys.version_info; print(f'py{v.major}{v.minor}')")"

# ray-llm image only support cuda 12.4 for now.
CUDA_CODE=cu124

if [[ "${PYTHON_CODE}" != "py311" ]]; then
echo "ray-llm only support Python 3.11 now (this image is for ${PYTHON_CODE})."
exit 1
fi

pip install --no-deps -r "requirements_compiled_rayllm_${PYTHON_CODE}.txt"
pip install --no-deps -r "requirements_compiled_rayllm_${PYTHON_CODE}_${CUDA_CODE}.txt"

# Export installed packages
$HOME/anaconda3/bin/pip freeze > /home/ray/pip-freeze.txt
Expand Down
Loading

0 comments on commit 8e4097c

Please sign in to comment.