Skip to content

Commit

Permalink
Feature/130 cuda flex versions (#132)
Browse files Browse the repository at this point in the history
* Tested combinations of Keras 2.x and TF 1.4.1, 1.8.0, and 1.9.0

* Go runner release for TF 1.4.1, 1.8.0, and 1.9.0

* Output while building our large container to prevent travis timing out due to no output

* Travis has some issues with networking at the moment, push again
  • Loading branch information
karlmutch authored Aug 10, 2018
1 parent 731947d commit b2eee5f
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 150 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ install:
- export -f travis_fold
- export -f travis_time_start
- export -f travis_time_finish
- go env
- export GOBIN=$GOPATH/bin
- env | sort
- go env
- env
- go get -u github.com/golang/dep/cmd/dep
- dep ensure
- (mkdir $GOPATH/src/github.com/karlmutch; cd $GOPATH/src/github.com/karlmutch ; git clone https://github.com/karlmutch/duat.git)
Expand Down
17 changes: 13 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ MAINTAINER [email protected]

ENV LANG C.UTF-8

ENV CUDA_DEB "https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb"
ENV CUDA_8_DEB "https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb"
ENV CUDA_9_DEB "https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb"
ENV CUDA_PACKAGE_VERSION 8-0
ENV CUDA_FILESYS_VERSION 8.0
ENV NVIDIA_VERSION 384
Expand All @@ -16,11 +17,19 @@ RUN \
apt-get -y install make git gcc && apt-get clean

RUN cd /tmp && \
wget --quiet -O /tmp/cuda.deb ${CUDA_DEB} && \
dpkg -i /tmp/cuda.deb && \
wget --quiet -O /tmp/cuda_8.deb ${CUDA_8_DEB} && \
dpkg -i /tmp/cuda_8.deb && \
apt-get -y update && \
DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends nvidia-cuda-dev cuda-nvml-dev-${CUDA_PACKAGE_VERSION} && \
rm /tmp/cuda.deb
rm /tmp/cuda*.deb && \
apt-get clean

#wget --quiet -O /tmp/cuda_9.deb ${CUDA_9_DEB} && \
#dpkg -i /tmp/cuda_9.deb && \
# apt-key add /var/cuda-repo-9-0-local/7fa2af80.pub && \
#apt-get -y update && \
#DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends cuda-runtime-9-2 && \
#rm /tmp/cuda*.deb

RUN \
ln -s /usr/local/cuda-${CUDA_FILESYS_VERSION} /usr/local/cuda && \
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# studio-go-runner

Version: <repo-version>0.6.0</repo-version>
Version: <repo-version>0.7.0</repo-version>

[![Build Status](https://travis-ci.org/SentientTechnologies/studio-go-runner.svg?branch=master)](https://travis-ci.org/SentientTechnologies/studio-go-runner) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/SentientTechnologies/studio-go-runner/blob/master/LICENSE) [![Go Report Card](https://goreportcard.com/badge/SentientTechnologies/studio-go-runner)](https://goreportcard.com/report/SentientTechnologies/studio-go-runner)

Expand Down
26 changes: 17 additions & 9 deletions build.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package main
import (
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path"
Expand Down Expand Up @@ -199,8 +200,7 @@ func runBuild(dir string, verFn string) (outputs []string, err errors.Error) {
// dir Dockerfile is for a projects build container typically.
if dir != "." {
logger.Info(fmt.Sprintf("dockerizing %s", dir))
if output, err := dockerize(md); err != nil {
logger.Warn(strings.Join(output, "\n"))
if err := dockerize(md); err != nil {
return nil, err
}
// Check for a bin directory and continue if none
Expand Down Expand Up @@ -435,15 +435,23 @@ func test(md *duat.MetaData) (outputs []string, errs []errors.Error) {
}

// dockerize is used to produce containers where appropriate within a build
// target directory
// target directory. Output is sent to the console as these steps can take
// very long periods of time and Travis with other build environments are
// prone to timeout if they see no output for an extended time.
//
func dockerize(md *duat.MetaData) (outputs []string, err errors.Error) {
func dockerize(md *duat.MetaData) (err errors.Error) {

exists, _, err := md.ImageExists()

output := strings.Builder{}
if !exists {
err = md.ImageCreate(&output)
}
return strings.Split(output.String(), "\n"), err
pr, pw := io.Pipe()

go func() {
if !exists {
err = md.ImageCreate(pw)
}
pw.Close()
}()
io.Copy(os.Stdout, pr)

return err
}
5 changes: 4 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ export LOGXI_FORMAT="happy,maxcol=1024"
[ -z "$TERM" ] && export TERM=xterm+256color;

if [ -n "$(type -t travis_fold)" ] && [ "$(type -t travis_fold)" = function ]; then
type travis_fold
type travis_time_start
type travis_time_finish
:
else
function travis_fold() {
Expand Down Expand Up @@ -86,4 +89,4 @@ travis_fold start "image.push"
fi
fi
travis_time_finish
travis_fold end "image.push"
travis_fold end "image.push"
95 changes: 76 additions & 19 deletions cmd/runner/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,31 +1,88 @@
FROM tensorflow/tensorflow:1.4.1-gpu
# FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
FROM ubuntu:16.04
LABEL maintainer "[email protected]"

MAINTAINER Karl Mutch <[email protected]>
RUN \
apt-get update && \
apt-get install -y locales && \
apt-get install -y language-pack-en && \
update-locale "en_US.UTF-8" && \
apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \
rm -rf /var/lib/apt/lists/* && \
NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
apt-get update


RUN apt-get install -y --no-install-recommends \
cuda-nvrtc-8-0=8.0.61-1 \
cuda-nvgraph-8-0=8.0.61-1 \
cuda-cusolver-8-0=8.0.61-1 \
cuda-cublas-8-0=8.0.61.2-1 \
cuda-cufft-8-0=8.0.61-1 \
cuda-curand-8-0=8.0.61-1 \
cuda-cusparse-8-0=8.0.61-1 \
cuda-npp-8-0=8.0.61-1 \
cuda-cudart-8-0=8.0.61-1

RUN apt-get install -y --no-install-recommends \
cuda-cudart-9-0=9.0.176-1 \
cuda-command-line-tools-9-0 \
cuda-cufft-9-0 \
cuda-curand-9-0 \
cuda-cusolver-9-0 \
cuda-cusparse-9-0 \
cuda-libraries-9-0=9.0.176-1 \
cuda-cublas-9-0=9.0.176.3-1 \
libnccl2=2.2.13-1+cuda9.0 && \
apt-mark hold libnccl2

RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf

RUN apt-get clean && \
rm /etc/apt/sources.list.d/cuda.list && \
echo "***" && \
rm /etc/apt/sources.list.d/nvidia-ml.list && \
apt-get -y update && \
apt-get -y install software-properties-common wget openssl ssh curl jq apt-utils dnsutils vim htop
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64

# add tensorflow-gpu to use with gpu to sudo pip install
# to use on linux machines with gpus
RUN apt-get -y update && \
apt-get -y upgrade
# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=8.0"

RUN apt-get -y install python-pip python-dev python3-pip python3-dev python3 git lshw && \
# Pick up some TF dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libcudnn5=5.1.10-1+cuda8.0 \
libcudnn6=6.0.21-1+cuda8.0 \
libcudnn7=7.1.4.18-1+cuda9.0 \
libnccl2=2.2.13-1+cuda9.0 \
libhdf5-serial-dev \
libpng12-dev \
libzmq3-dev \
pkg-config \
software-properties-common \
unzip \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN apt-get update && \
apt-get install -y python python-pip python3 python3-pip python3-dev python-dev git lshw && \
pip install --upgrade pip==9.0.3 setuptools

RUN \
apt-get -y install libssl-dev libcurl4-openssl-dev libsm6 libxrender-dev libxext-dev && \
pip install tensorflow-gpu==1.4.1 && \
pip install tensorflow-gpu==1.8.0 && \
pip install tensorflow-gpu==1.9.0 && \
pip3 install --upgrade pip==9.0.3 --force-reinstall && \
pip install --upgrade pip==9.0.3 --force-reinstall && \
python -m pip install pip==9.0.3 virtualenv==15.2.0 --force-reinstall && \
python3 -m pip install pip==9.0.3 virtualenv==15.2.0 --force-reinstall && \
pip install --upgrade Python==2.7.12

RUN apt-get install -y locales && \
apt-get install -y language-pack-en && \
update-locale "en_US.UTF-8"
apt-get clean

RUN mkdir -p /runner/certs/aws-sqs
WORKDIR /runner
Expand Down
54 changes: 0 additions & 54 deletions cmd/runner/Dockerfile-tf-1.4.1

This file was deleted.

53 changes: 0 additions & 53 deletions cmd/runner/Dockerfile-tf-1.8.0

This file was deleted.

Loading

0 comments on commit b2eee5f

Please sign in to comment.