Skip to content

Commit

Permalink
Move all python modules to a requirements.txt file
Browse files Browse the repository at this point in the history
Move python modules to a separate file to coloasece all
libraries we need.

Update a few odds and ends related to various projects I'm
working on.
  • Loading branch information
pisymbol committed Dec 30, 2019
1 parent 7cc6388 commit 2c25e4e
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 61 deletions.
86 changes: 25 additions & 61 deletions ml/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04

ARG OPENCV_VER=4.1.0
ARG OPENCV_VER=4.2.0
ARG SPARK_VER=2.4.4
ARG OPENMPI_VER=4.0.2

WORKDIR /root
CMD ["/bin/bash"]
Expand All @@ -16,20 +18,24 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
curl \
git \
software-properties-common \
sudo \
tar \
unzip \
wget \
vim && \
apt-get clean && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*


# Ensure CUDA is in our run-time loader's path
RUN cd /usr/local && echo '/usr/local/cuda/lib64' > /etc/ld.so.conf.d/cuda.conf && ldconfig

# Install some base dependencies for various modules below.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
dirmngr \
gfortran \
liblas-dev \
liblapack3 \
liblapacke \
liblapacke-dev \
libatlas3-base \
libavcodec-dev \
libavformat-dev \
Expand All @@ -44,11 +50,12 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libgtk2.0-dev \
libjpeg-dev \
libjpeg8-dev \
liblapacke-dev \
liblapack-dev \
libleveldb-dev \
liblmdb-dev \
libpng-dev \
libprotobuf-dev \
libspatialindex-dev \
libsnappy-dev \
libswscale-dev \
libtbb2 \
Expand All @@ -66,43 +73,14 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
unzip && \
apt-get clean && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*

RUN wget https://github.com/Kitware/CMake/releases/download/v3.15.2/cmake-3.15.2.tar.gz && tar -xvf cmake*tar.gz && cd cmake* && ./bootstrap && make && make install
RUN wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz && tar -xvf cmake*tar.gz && cd cmake* && ./bootstrap && make && make install

# Get the latest pip3
RUN wget -q https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && rm -f get-pip.py

# Install python3 infrastructure
RUN pip3 --no-cache-dir install \
click \
cntk-gpu \
Cython \
bs4 \
nltk \
pandas \
numpy \
seaborn \
matplotlib \
scikit-learn \
scikit-image \
imutils \
jupyter \
ipython \
h5py \
nose \
sympy \
mahotas \
ndg-httpsclient \
pyasn1 \
keras \
path.py \
Pillow \
pygments \
pyopenssl \
six \
sphinx \
wheel \
wordcloud \
zmq
COPY requirements.txt .
RUN pip3 --no-cache-dir install -r requirements.txt && rm -f requirements.txt

# Install Intel DNN
RUN git clone https://github.com/intel/mkl-dnn.git && \
Expand All @@ -116,17 +94,18 @@ RUN git clone --recursive https://github.com/dmlc/xgboost && \
cd /root/xgboost && cd python-package && python3 setup.py install

# Install OpenCV
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev tesseract-ocr libtesseract-dev liblapack-dev
RUN wget -q -O opencv-${OPENCV_VER}.zip https://github.com/opencv/opencv/archive/${OPENCV_VER}.zip && \
wget -q -O opencv_contrib-${OPENCV_VER}.zip https://github.com/opencv/opencv_contrib/archive/${OPENCV_VER}.zip && \
unzip opencv-${OPENCV_VER}.zip && unzip opencv_contrib-${OPENCV_VER}.zip && \
cd /root/opencv-${OPENCV_VER} && mkdir build && cd build && \
cmake -DWITH_OPENGL=ON -DENABLE_FAST_MATH=1 -DCUDA_FAST_MATH=1 -DWITH_CUBLAS=1 -DWITH_TBB=ON -DWITH_GDAL=ON -DWITH_XINE=ON -DBUILD_PERF_TESTS=OFF -D BUILD_TESTS=OFF -DCUDA_NVCC_FLAGS="-D_FORCE_INLINES --expt-relaxed-constexpr" -DOPENCV_EXTRA_MODULES_PATH=/root/opencv_contrib-${OPENCV_VER}/modules .. && \
cmake -DWITH_OPENGL=ON -DWITH_GSTREAMER=ON -DENABLE_FAST_MATH=1 -DCUDA_FAST_MATH=1 -DWITH_CUBLAS=1 -DWITH_TBB=ON -DWITH_GDAL=ON -DWITH_XINE=ON -DBUILD_PERF_TESTS=OFF -D BUILD_TESTS=OFF -DCUDA_NVCC_FLAGS="-D_FORCE_INLINES --expt-relaxed-constexpr" -DOPENCV_EXTRA_MODULES_PATH=/root/opencv_contrib-${OPENCV_VER}/modules .. && \
make && make install && ldconfig && \
echo 'ln /dev/null /dev/raw1394' >> ~/.bashrc && rm -rf /root/opencv*

# Install OpenMPI
RUN wget -q https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz && tar -zxf openmpi-4.0.1.tar.gz && \
cd openmpi-4.0.1 && ./configure --prefix=/usr/local/mpi && \
RUN wget -q https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-${OPENMPI_VER}.tar.gz && tar -zxf openmpi-${OPENMPI_VER}.tar.gz && \
cd openmpi-${OPENMPI_VER} && ./configure --prefix=/usr/local/mpi && \
make && make install

# Need to use gcc-7 for OpenCV compilation (this may change)
Expand All @@ -142,40 +121,25 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
RUN update-alternatives --install /usr/bin/java java /usr/lib/jvm/java-8-openjdk-amd64/bin/java 10000

# Install Spark
RUN wget -q -O spark.tgz http://mirror.olnevhost.net/pub/apache/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz && \
RUN wget -q -O spark.tgz http://mirror.olnevhost.net/pub/apache/spark/spark-${SPARK_VER}/spark-${SPARK_VER}-bin-hadoop2.7.tgz && \
mkdir -p /usr/local && tar -xf spark.tgz -C /usr/local && \
mv /usr/local/spark*2.4.3* /usr/local/spark && \
mv /usr/local/spark*${SPARK_VER}* /usr/local/spark && \
pip3 install --upgrade pyspark

RUN pip3 install torch torchvision
RUN pip3 install --upgrade fake_useragent click ipython bokeh flask pytest coverage gunicorn
RUN pip3 install --upgrade bcolz theano kaggle-cli pymc3 animation graphviz
RUN pip3 install --upgrade numpy notebook jupyter jupyterlab scipy kaggle urllib3 spacy spacy[cuda92] gensim plotly quandl fastai tornado==5.1.1 ipython-sql
RUN pip3 install --upgrade awscli mwparserfromhell
RUN pip3 install --upgrade jupytext
RUN jupyter serverextension enable --py jupyterlab --sys-prefix
RUN jupyter nbextension install --py jupytext && jupyter nbextension enable --py jupytext

# Download all spacy embeddings
# Download all embeddings/tokenizers up front
RUN python3 -m spacy download en
RUN python3 -m spacy download en_core_web_sm
RUN python3 -m spacy download en_core_web_md
RUN python3 -m spacy download en_core_web_lg
RUN python3 -m spacy download en_vectors_web_lg

# Install nltk
RUN python3 -c "import nltk; nltk.download('all')"

# Allow plotly and bokeh to display in JupyterLab
# Setup jupyter extensions
RUN jupyter serverextension enable --py jupyterlab --sys-prefix
RUN jupyter nbextension install --py jupytext && jupyter nbextension enable --py jupytext
RUN jupyter labextension install @jupyterlab/plotly-extension
RUN jupyter labextension install jupyterlab_bokeh

# Install Heroku
RUN curl https://cli-assets.heroku.com/install-ubuntu.sh | sh

# Upgrade to 2.0
RUN pip3 install tensorflow-gpu==2.0.0-rc0

# Clean up
RUN rm -rf /root/* && chmod 755 /root

Expand Down Expand Up @@ -219,7 +183,7 @@ RUN sed -i -E 's,kaggle,Development/kaggle,g' /home/pisymbol/.kaggle/kaggle.json

# Required spark configuration for local user access
ENV SPARK_HOME=/usr/local/spark
ENV PYTHONPATH=/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.4-src.zip
ENV PYTHONPATH=/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.7-src.zip
ENV PATH=/usr/local/spark/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/spark/bin

EXPOSE 4040 6006 8888
EXPOSE 4040 6006 8888 8080 8081
69 changes: 69 additions & 0 deletions ml/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
animation
awscli
bcolz
bokeh
bs4
cartopy
click
cntk-gpu
contextily
coverage
Cython
fake_useragent
fastai
flask
folium
gensim
geopandas
geoplot
graphviz
gunicorn
h5py
imutils
ipyleaflet
ipython
jupyter
jupyterlab
jupytext
kaggle
kaggle-cli
keras
mahotas
mapclassify
matplotlib
mwparserfromhell
ndg-httpsclient
nltk
nose
notebook
numpy
pandas
path.py
Pillow
plotly
pyasn1
pygments
pymc3
pyopenssl
pytest
python-sql
quandl
rasterio
rasterstats
rtree
scikit-image
scikit-learn
scipy
seaborn
six
spacy
spacy[cuda92]
sphinx
sympy
tensorflow-gpu
torch
torchvision
urllib3
wheel
wordcloud
zmq

0 comments on commit 2c25e4e

Please sign in to comment.