Initial commit

jim60105 · Aug 27, 2023 · 43ba06f · 43ba06f
commit 43ba06f
Show file tree

Hide file tree

Showing 8 changed files with 89 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,4 @@
+**.github
+**/*.md
+**.git*
+**Dockerfile*
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.sh eol=lf
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+*.env
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "whisperX"]
+	path = whisperX
+	url = https://github.com/m-bain/whisperX
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,57 @@
+ARG LANG=en
+
+# Base image
+FROM nvcr.io/nvidia/pytorch:23.07-py3 as base
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app
+
+# Install requirements
+COPY ./whisperX/requirements.txt .
+RUN python3 -m pip install --no-cache-dir -r ./requirements.txt ujson
+
+# Preload fast-whisper
+ARG WHISPER_MODEL=tiny.en
+RUN python3 -c 'import faster_whisper; model = faster_whisper.WhisperModel("'${WHISPER_MODEL}'")'
+
+# Preload align model
+FROM base AS align-en
+ARG ALIGN_MODEL=WAV2VEC2_ASR_BASE_960H
+RUN python3 -c 'import torchaudio; bundle = torchaudio.pipelines.__dict__["'${ALIGN_MODEL}'"]; align_model = bundle.get_model(); labels = bundle.get_labels()'
+
+FROM base AS align-fr
+ARG ALIGN_MODEL=VOXPOPULI_ASR_BASE_10K_FR
+RUN python3 -c 'import torchaudio; bundle = torchaudio.pipelines.__dict__["'${ALIGN_MODEL}'"]; align_model = bundle.get_model(); labels = bundle.get_labels()'
+
+FROM base AS align-de
+ARG ALIGN_MODEL=VOXPOPULI_ASR_BASE_10K_DE
+RUN python3 -c 'import torchaudio; bundle = torchaudio.pipelines.__dict__["'${ALIGN_MODEL}'"]; align_model = bundle.get_model(); labels = bundle.get_labels()'
+
+FROM base AS align-es
+ARG ALIGN_MODEL=VOXPOPULI_ASR_BASE_10K_ES
+RUN python3 -c 'import torchaudio; bundle = torchaudio.pipelines.__dict__["'${ALIGN_MODEL}'"]; align_model = bundle.get_model(); labels = bundle.get_labels()'
+
+FROM base AS align-it
+ARG ALIGN_MODEL=VOXPOPULI_ASR_BASE_10K_IT
+RUN python3 -c 'import torchaudio; bundle = torchaudio.pipelines.__dict__["'${ALIGN_MODEL}'"]; align_model = bundle.get_model(); labels = bundle.get_labels()'
+
+FROM base AS align-ja
+ARG ALIGN_MODEL=jonatasgrosman/wav2vec2-large-xlsr-53-japanese
+RUN python3 -c 'from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor; processor = Wav2Vec2Processor.from_pretrained("'${ALIGN_MODEL}'"); align_model = Wav2Vec2ForCTC.from_pretrained("'${ALIGN_MODEL}'")'
+
+FROM base AS align-zh
+ARG ALIGN_MODEL=jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn
+RUN python3 -c 'from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor; processor = Wav2Vec2Processor.from_pretrained("'${ALIGN_MODEL}'"); align_model = Wav2Vec2ForCTC.from_pretrained("'${ALIGN_MODEL}'")'
+
+FROM align-${LANG} AS final
+
+# Install whisperX
+COPY ./whisperX/ .
+RUN python3 -m pip install --no-cache-dir .
+
+# Create and switch to a non-root user
+RUN useradd -m -s /bin/bash appuser
+USER appuser
+
+STOPSIGNAL SIGINT
+ENTRYPOINT [ "whisperx" ]
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 陳鈞
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1 @@
+# docker-whisperX
diff --git a/whisperX b/whisperX