-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathDockerfile
202 lines (159 loc) · 5.92 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# syntax=docker/dockerfile:1
ARG WHISPER_MODEL=base
ARG LANG=en
ARG UID=1001
ARG VERSION=EDGE
ARG RELEASE=0
# These ARGs are for caching stage builds in CI
# Leave them as is when building locally
ARG LOAD_WHISPER_STAGE=load_whisper
ARG NO_MODEL_STAGE=no_model
# When downloading diarization model with auth token, it seems that it is not respecting the TORCH_HOME env variable.
# So it is necessary to ensure that the CACHE_HOME is set to the exact same path as the default path.
# https://github.com/jim60105/docker-whisperX/issues/27
ARG CACHE_HOME=/.cache
ARG CONFIG_HOME=/.config
ARG TORCH_HOME=${CACHE_HOME}/torch
ARG HF_HOME=${CACHE_HOME}/huggingface
######
# Base stage
######
FROM python:3.11-slim as base
# Missing dependencies for arm64 (needed for build-time and run-time)
# https://github.com/jim60105/docker-whisperX/issues/14
ARG TARGETPLATFORM
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
apt-get update && apt-get install -y --no-install-recommends libgomp1=12.2.0-14 libsndfile1=1.2.0-1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*; \
fi
######
# Build stage
######
FROM base as build
# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892
ARG TARGETARCH
ARG TARGETVARIANT
WORKDIR /app
# Install under /root/.local
ENV PIP_USER="true"
ARG PIP_NO_WARN_SCRIPT_LOCATION=0
ARG PIP_ROOT_USER_ACTION="ignore"
# Add git
RUN apt-get update && apt-get install -y --no-install-recommends git=1:2.39.2-1.1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install requirements
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
pip install -U --extra-index-url https://download.pytorch.org/whl/cu118 \
torch==2.1.1 torchaudio==2.1.1 \
pyannote.audio==3.1.1 \
# Use dumb-init as PID 1 to handle signals properly
pip dumb-init
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
--mount=source=whisperX/requirements.txt,target=requirements.txt \
pip install -r requirements.txt
# Install whisperX
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \
--mount=source=whisperX,target=.,rw \
pip install . && \
# Cleanup
find "/root/.local" -name '*.pyc' -print0 | xargs -0 rm -f || true ; \
find "/root/.local" -type d -name '__pycache__' -print0 | xargs -0 rm -rf || true ;
######
# Final stage for no_model
######
FROM base as no_model
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
# We don't need them anymore
RUN pip3.11 uninstall -y pip wheel && \
rm -rf /root/.cache/pip
# ffmpeg
COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffmpeg /usr/local/bin/
COPY --link --from=mwader/static-ffmpeg:6.1.1 /ffprobe /usr/local/bin/
# Create user
ARG UID
RUN groupadd -g $UID $UID && \
useradd -l -u $UID -g $UID -m -s /bin/sh -N $UID
ARG CACHE_HOME
ARG CONFIG_HOME
ARG TORCH_HOME
ARG HF_HOME
ENV XDG_CACHE_HOME=${CACHE_HOME}
ENV TORCH_HOME=${TORCH_HOME}
ENV HF_HOME=${HF_HOME}
RUN install -d -m 775 -o $UID -g 0 /licenses && \
install -d -m 775 -o $UID -g 0 ${CACHE_HOME} && \
install -d -m 775 -o $UID -g 0 ${CONFIG_HOME}
# Copy licenses (OpenShift Policy)
COPY --link --chmod=775 LICENSE /licenses/LICENSE
COPY --link --chmod=775 whisperX/LICENSE /licenses/whisperX.LICENSE
# Copy dependencies and code (and support arbitrary uid for OpenShift best practice)
# https://docs.openshift.com/container-platform/4.14/openshift_images/create-images.html#use-uid_create-images
COPY --link --chown=$UID:0 --chmod=775 --from=build /root/.local /home/$UID/.local
ENV PATH="/home/$UID/.local/bin:$PATH"
ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.11/site-packages"
ARG WHISPER_MODEL
ENV WHISPER_MODEL=
ARG LANG
ENV LANG=
WORKDIR /app
VOLUME [ "/app" ]
USER $UID
STOPSIGNAL SIGINT
ENTRYPOINT [ "dumb-init", "--", "/bin/sh", "-c", "whisperx \"$@\"" ]
ARG VERSION
ARG RELEASE
LABEL name="jim60105/docker-whisperX" \
# Authors for WhisperX
vendor="Bain, Max and Huh, Jaesung and Han, Tengda and Zisserman, Andrew" \
# Maintainer for this docker image
maintainer="jim60105" \
# Dockerfile source repository
url="https://github.com/jim60105/docker-whisperX" \
version=${VERSION} \
# This should be a number, incremented with each change
release=${RELEASE} \
io.k8s.display-name="WhisperX" \
summary="WhisperX: Time-Accurate Speech Transcription of Long-Form Audio" \
description="This is the docker image for WhisperX: Automatic Speech Recognition with Word-Level Timestamps (and Speaker Diarization) from the community. For more information about this tool, please visit the following website: https://github.com/m-bain/whisperX."
######
# load_whisper stage: This stage will be tagged for caching in CI.
######
FROM ${NO_MODEL_STAGE} as load_whisper
ARG TORCH_HOME
ARG HF_HOME
# Preload vad model
RUN python3 -c 'from whisperx.vad import load_vad_model; load_vad_model("cpu");'
# Preload fast-whisper
ARG WHISPER_MODEL
RUN python3 -c 'import faster_whisper; model = faster_whisper.WhisperModel("'${WHISPER_MODEL}'")'
######
# load_align stage
######
FROM ${LOAD_WHISPER_STAGE} as load_align
ARG TORCH_HOME
ARG HF_HOME
# Preload align models
ARG LANG
RUN --mount=source=load_align_model.py,target=load_align_model.py \
for i in ${LANG}; do echo "Aliging lang $i"; python3 load_align_model.py "$i"; done
######
# Final stage with model
######
FROM ${NO_MODEL_STAGE} as final
ARG UID
ARG CACHE_HOME
COPY --link --chown=$UID:0 --chmod=775 \
--from=load_align ${CACHE_HOME} ${CACHE_HOME}
ARG WHISPER_MODEL
ENV WHISPER_MODEL=${WHISPER_MODEL}
ARG LANG
ENV LANG=${LANG}
# Take the first language from LANG env variable
ENTRYPOINT [ "dumb-init", "--", "/bin/sh", "-c", "LANG=$(echo ${LANG} | cut -d ' ' -f1); whisperx --model \"${WHISPER_MODEL}\" --language \"${LANG}\" \"$@\"" ]
ARG VERSION
ARG RELEASE
LABEL version=${VERSION} \
release=${RELEASE}