diff --git a/CHANGELOG.md b/CHANGELOG.md index 30d1a18a7..668441fe0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -325,10 +325,11 @@ IMPROVEMENTS: * Upgrades to the AWS cli, and prometheus common libraries * Introduce queue-status tool for use with Job dispatching deployments * Ubuntu 18.04 migrated to Ubuntu 20.04 -* TensorFlow 1.x support removed, versions now supported are 2.1-2.4 +* TensorFlow 1.x support removed, versions now supported are 2.1-2.5 * Python support bumped to include 3.9 * gRPC and protobuf upgrades * Go 1.16.4 support +* CUDA 11.2 Migration FIXES: diff --git a/Dockerfile_base b/Dockerfile_base index f73e798fb..895378c48 100644 --- a/Dockerfile_base +++ b/Dockerfile_base @@ -7,9 +7,9 @@ MAINTAINER karlmutch@gmail.com ENV LANG C.UTF-8 ENV DEBIAN_FRONTEND noninteractive -ENV CUDA_PACKAGE_VERSION 11-0 -ENV CUDA_FILESYS_VERSION 11.0 -ENV NVIDIA_VERSION 455 +ENV CUDA_PACKAGE_VERSION 11-2 +ENV CUDA_FILESYS_VERSION 11.2 +ENV NVIDIA_VERSION 465 RUN apt-get update && \ apt-get install -y locales && \ @@ -24,13 +24,18 @@ RUN apt-get update && \ RUN mkdir /usr/lib/nvidia && \ cd /tmp && \ apt-get install -y freeglut3 freeglut3-dev libxi-dev libxmu-dev && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \ - wget -q -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin && \ - wget -q -O /tmp/cuda.deb http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.2-450.51.05-1_amd64.deb && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub && \ + wget -q -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \ + wget -q -O /tmp/cuda.deb https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda-repo-ubuntu2004-11-2-local_11.2.2-460.32.03-1_amd64.deb && \ dpkg -i /tmp/cuda.deb && \ apt-get -y update -RUN apt-get -y install --no-install-recommends nvidia-cuda-dev && \ +RUN \ + apt list nvidia-\* &&\ + apt list cuda-\* &&\ + apt-get -y install --no-install-recommends cuda-command-line-tools-${CUDA_PACKAGE_VERSION} && \ + apt-get -y install --no-install-recommends cuda-libraries-${CUDA_PACKAGE_VERSION} && \ + apt-get -y install --no-install-recommends cuda-libraries-dev-${CUDA_PACKAGE_VERSION} && \ apt-get -y install --no-install-recommends cuda-nvml-dev-${CUDA_PACKAGE_VERSION} && \ apt-get clean @@ -40,7 +45,6 @@ RUN \ ln -s /usr/local/cuda/targets/x86_64-linux/include /usr/local/cuda/include && \ ln -s /usr/lib/nvidia-${NVIDIA_VERSION}/libnvidia-ml.so /usr/lib/nvidia/libnvidia-ml.so && \ ln -s /usr/lib/nvidia-${NVIDIA_VERSION}/libnvidia-ml.so.1 /usr/lib/nvidia/libnvidia-ml.so.1 && \ - ln -s /usr/lib/nvidia-${NVIDIA_VERSION}/libnvidia-ml.so.${NVIDIA_VERSION}.45.01 /usr/lib/nvidia/libnvidia-ml.so.${NVIDIA_VERSION}.45.01 && \ apt-get -y install --no-install-recommends libcuinj64-10.1 && \ rm /tmp/cuda*.deb && \ apt-get -y autoclean && \ @@ -51,8 +55,8 @@ RUN \ RUN \ rm -rf /var/lib/apt/lists/* && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ + echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ + echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ apt-get update RUN apt-get install -y --no-install-recommends \ @@ -85,6 +89,6 @@ RUN wget ${PROTOBUF_URL} && \ chmod -R +r /usr/include/google LABEL vendor="Cognizant, Evolutionary AI, DB&T" \ - registry.version=0.0.8 \ + registry.version=0.0.9 \ registry.repo=leafai/studio-go-runner-dev-base \ registry.base=studio-go-runner-dev-base diff --git a/Dockerfile_developer b/Dockerfile_developer index 39edfafc1..8ddfa7355 100644 --- a/Dockerfile_developer +++ b/Dockerfile_developer @@ -1,15 +1,15 @@ # Copyright 2018-2021 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. # -FROM leafai/studio-go-runner-dev-base:0.0.8 +FROM leafai/studio-go-runner-dev-base:0.0.9 MAINTAINER karlmutch@gmail.com ENV LANG C.UTF-8 ENV DEBIAN_FRONTEND noninteractive -ENV CUDA_PACKAGE_VERSION 11-0 -ENV CUDA_FILESYS_VERSION 11.0 -ENV NVIDIA_VERSION 455 +ENV CUDA_PACKAGE_VERSION 11-2-2 +ENV CUDA_FILESYS_VERSION 11.2.2 +ENV NVIDIA_VERSION 465 ENV USER {{.duat.userName}} ENV USER_ID {{.duat.userID}} @@ -50,7 +50,7 @@ RUN \ python3 get-pip.py pip==20.1 setuptools==44.0.0 wheel==0.33.6 && \ pip3 install pyopenssl cryptography --upgrade -ENV GO_VERSION 1.16.4 +ENV GO_VERSION 1.16.5 RUN \ cd /home/${USER} && \ diff --git a/Dockerfile_microk8s b/Dockerfile_microk8s index 8bfd45a9e..f56ef7127 100644 --- a/Dockerfile_microk8s +++ b/Dockerfile_microk8s @@ -1,13 +1,13 @@ # Copyright 2018-2021 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. # -FROM registry.container-registry.svc.cluster.local:5000/leafai/studio-go-runner-dev-stack:0.0.2 +FROM registry.container-registry.svc.cluster.local:5000/leafai/studio-go-runner-dev-stack:0.0.3 MAINTAINER karlmutch@gmail.com ENV LANG C.UTF-8 ENV DEBIAN_FRONTEND noninteractive -ENV GO_VERSION 1.16.4 +ENV GO_VERSION 1.16.5 RUN \ curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ diff --git a/Dockerfile_stack b/Dockerfile_stack index cc5e93b86..01beb66db 100644 --- a/Dockerfile_stack +++ b/Dockerfile_stack @@ -1,20 +1,20 @@ # Copyright 2018-2021 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. -# -FROM leafai/studio-go-runner-dev-base:0.0.8 + +FROM leafai/studio-go-runner-dev-base:0.0.9 MAINTAINER karlmutch@gmail.com ENV LANG C.UTF-8 ENV DEBIAN_FRONTEND noninteractive -ENV CUDA_PACKAGE_VERSION 11-0 -ENV CUDA_FILESYS_VERSION 11.0 -ENV NVIDIA_VERSION 455 +ENV CUDA_PACKAGE_VERSION 11-2 +ENV CUDA_FILESYS_VERSION 11.2 +ENV NVIDIA_VERSION 465 # nvidia-container-runtime ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0" +ENV NVIDIA_REQUIRE_CUDA "cuda>=11.2" # Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -31,9 +31,9 @@ RUN \ curl https://pyenv.run | /bin/bash && \ export PATH=$HOME/.pyenv/bin:$PATH && \ echo "export PATH=\"$HOME/.pyenv/bin:$PATH\"" >> $HOME/.bashrc && \ - echo "eval \"\$(pyenv init -)\"" >> $HOME/.bashrc && \ + echo "eval \"\$(pyenv init --path)\"" >> $HOME/.bashrc && \ echo "eval \"\$(pyenv virtualenv-init -)\"" >> $HOME/.bashrc && \ - eval "$(pyenv init -)" && \ + eval "$(pyenv init --path)" && \ eval "$(pyenv virtualenv-init -)" && \ pyenv install --list | grep " 3\.[56789]" && \ pyenv install 3.9.5 && \ @@ -46,22 +46,23 @@ RUN \ ENV PATH /root/.pyenv/bin:$PATH RUN \ - eval "$(pyenv init -)" && \ + eval "$(pyenv init --path)" && \ eval "$(pyenv virtualenv-init -)" && \ which python3 && \ which pip3 && \ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ - python3 get-pip.py pip==20.1 setuptools==44.0.0 wheel==0.33.6 && \ + python3 get-pip.py pip==21.1.2 setuptools==44.0.0 wheel==0.33.6 && \ pip3 install pyopenssl cryptography --upgrade RUN apt-get -y install libssl-dev libcurl4-openssl-dev libsm6 libxrender-dev libxext-dev libopenblas-base libopenblas-dev && \ - eval "$(pyenv init -)" && \ + eval "$(pyenv init --path)" && \ eval "$(pyenv virtualenv-init -)" && \ python3 -m pip install tensorflow-gpu==2.3.0 && \ python3 -m pip install tensorflow-gpu==2.4.0 && \ + python3 -m pip install tensorflow-gpu==2.5.0 && \ apt-get clean LABEL vendor="Cognizant, Evolutionary AI, DB&T" \ - registry.version=0.0.2 \ + registry.version=0.0.3 \ registry.repo=leafai/studio-go-runner-dev-stack \ registry.base=studio-go-runner-dev-stack diff --git a/Dockerfile_standalone b/Dockerfile_standalone index f4304aa9a..0d878cf5b 100644 --- a/Dockerfile_standalone +++ b/Dockerfile_standalone @@ -1,13 +1,13 @@ # Copyright 2018-2021 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. # -FROM leafai/studio-go-runner-dev-stack:0.0.2 +FROM leafai/studio-go-runner-dev-stack:0.0.3 MAINTAINER karlmutch@gmail.com ENV LANG C.UTF-8 ENV DEBIAN_FRONTEND noninteractive -ENV GO_VERSION 1.16.4 +ENV GO_VERSION 1.16.5 RUN \ curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ diff --git a/Dockerfile_workstation b/Dockerfile_workstation index d8265067f..b4c09dc32 100644 --- a/Dockerfile_workstation +++ b/Dockerfile_workstation @@ -1,13 +1,13 @@ # Copyright 2018-2021 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. # -FROM leafai/studio-go-runner-dev-stack:0.0.2 +FROM leafai/studio-go-runner-dev-stack:0.0.3 MAINTAINER karlmutch@gmail.com ENV LANG C.UTF-8 ENV DEBIAN_FRONTEND noninteractive -ENV GO_VERSION 1.16.4 +ENV GO_VERSION 1.16.5 RUN \ mkdir -p /project/go && \ diff --git a/README.md b/README.md index ce511f9a1..4ed343cab 100644 --- a/README.md +++ b/README.md @@ -391,9 +391,9 @@ sudo dpkg -i trivy_0.18.3_Linux-64bit.deb This code based makes use of Go 1.16+. The compiler can be found on the golang.org web site for downloading. On Ubuntu the following commands can be used: ``` -wget https://golang.org/dl/go1.16.4.linux-amd64.tar.gz +wget https://golang.org/dl/go1.16.5.linux-amd64.tar.gz rm -rf go -tar xzf go1.16.4.linux-amd64.tar.gz +tar xzf go1.16.5.linux-amd64.tar.gz export GOROOT=`pwd`/go export PATH=`pwd`/go/bin:$PATH ``` diff --git a/cmd/runner/Dockerfile.stock b/cmd/runner/Dockerfile.stock index afb25f80f..093ba5c4f 100644 --- a/cmd/runner/Dockerfile.stock +++ b/cmd/runner/Dockerfile.stock @@ -1,6 +1,6 @@ # Copyright 2018-2021 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. # -FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 +FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04 ENV DEBIAN_FRONTEND noninteractive @@ -23,7 +23,7 @@ ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cud # nvidia-container-runtime ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV NVIDIA_REQUIRE_CUDA "cuda>=11.0" +ENV NVIDIA_REQUIRE_CUDA "cuda>=11.2" RUN apt-get -y update && apt-get install -y --no-install-recommends \ pkg-config \ @@ -81,6 +81,7 @@ RUN \ eval "$(pyenv virtualenv-init -)" && \ python3 -m pip install tensorflow-gpu==2.3.0 && \ python3 -m pip install tensorflow-gpu==2.4.0 && \ + python3 -m pip install tensorflow-gpu==2.5.0 && \ python3 -m pip install virtualenv==15.2.0 --force-reinstall && \ python3 -m pip install opencv-python-headless && \ apt-get clean @@ -107,7 +108,7 @@ COPY add-ons/termite.terminfo /lib/terminfo/x/xterm-termite # Prometheus instrumented port EXPOSE 9090 -ENV NVIDIA_REQUIRE_CUDA=cuda>=11.0 +ENV NVIDIA_REQUIRE_CUDA=cuda>=11.2 COPY run.sh /runner/run.sh COPY bin/runner-linux-amd64 /runner/runner-linux-amd64 diff --git a/cmd/runner/run.sh b/cmd/runner/run.sh index 34e7318ed..59415ad12 100644 --- a/cmd/runner/run.sh +++ b/cmd/runner/run.sh @@ -1,6 +1,6 @@ #!/bin/bash -e -# Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. +# Copyright 2018-2021 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. echo "pip 3 freeze and config" pip3 freeze @@ -30,11 +30,6 @@ else ls /usr/local/nvidia/bin echo "** /usr/local/nvidia/lib64" ls /usr/local/nvidia/lib64 - echo "** /etc/ld.so.conf.d/cuda-8-0.conf" - cat /etc/ld.so.conf.d/cuda-8-0.conf - echo "** /usr/local/cuda-8.0/targets/x86_64-linux/lib" - ls /usr/local/cuda-8.0/targets/x86_64-linux/lib - find . -not \( -path .cache -prune \) -not \( -path .pyenv -prune \) -print || true find / -name libnvidia-ml\* -print find / -name nvidia-smi -print /runner/runner-linux-amd64 diff --git a/docs/ci.md b/docs/ci.md index 9c14c2a25..3ff05fe6e 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -253,8 +253,8 @@ In order to prepare for producing product specific build images a base image is If you wish to simply use an existing build configuration then you can pull the prebuilt image into your local docker registry, or from docker hub using the following command: ``` -docker pull leafai/studio-go-runner-dev-base:0.0.7 -docker pull leafai/studio-go-runner-dev-stack:0.0.1 +docker pull leafai/studio-go-runner-dev-base:0.0.9 +docker pull leafai/studio-go-runner-dev-stack:0.0.3 ``` For situations where an on-premise or single developer machine the base image can be built with the `Dockerfile_base`, and `Dockerfile_stack` files using the following command: diff --git a/licenses.manifest b/licenses.manifest index 6d693f5b4..397fd562c 100644 --- a/licenses.manifest +++ b/licenses.manifest @@ -1,12 +1,12 @@ .,ECL-2.0,0.7848921 docs/slides,MIT-0,0.8050314 -docs/slides/lib/font/source-sans-pro,OFL-1.0-RFN,0.757377 +docs/slides/lib/font/source-sans-pro,OFL-1.0,0.757377 examples/aws/aws,OpenSSL,0.7797131 examples/aws/aws/dist/cryptography-2.8-py3.7.egg-info,BSD-3-Clause-Clear,0.83412325 -examples/aws/cpu,MPL-2.0-no-copyleft-exception,0.33333334 -examples/docker,deprecated_GPL-2.0-with-font-exception,0.33333334 +examples/aws/cpu,deprecated_GPL-2.0-with-bison-exception,0.33333334 +examples/docker,deprecated_GPL-2.0-with-autoconf-exception,0.33333334 examples/local,deprecated_GPL-2.0-with-font-exception,0.33333334 -tools/serving-bridge,deprecated_GPL-2.0-with-classpath-exception,0.33333334 +tools/serving-bridge,MPL-2.0-no-copyleft-exception,0.33333334 vendor/cloud.google.com/go,SHL-0.5,0.85278857 vendor/github.com/Azure/go-autorest,ECL-2.0,0.83882034 vendor/github.com/Azure/go-autorest/autorest,ECL-2.0,0.83882034 @@ -94,7 +94,7 @@ vendor/github.com/karlmutch/semver,MIT-0,0.8148148 vendor/github.com/karlmutch/vtclean,MIT-0,0.8125 vendor/github.com/kevinburke/ssh_config,MIT-0,0.8125 vendor/github.com/klauspost/compress,BSD-3-Clause,0.9306931 -vendor/github.com/klauspost/compress/zstd,IJG,0.33333334 +vendor/github.com/klauspost/compress/zstd,Linux-syscall-note,0.33333334 vendor/github.com/klauspost/compress/zstd/internal/xxhash,MIT-0,0.82208586 vendor/github.com/klauspost/cpuid/v2,MIT-0,0.81595093 vendor/github.com/klauspost/pgzip,MIT-0,0.82208586 @@ -110,7 +110,7 @@ vendor/github.com/mholt/archiver/v3,MIT-0,0.8148148 vendor/github.com/michaelklishin/rabbit-hole/v2,BSD-2-Clause-NetBSD,0.9076087 vendor/github.com/minio/madmin-go,SHL-0.5,0.85278857 vendor/github.com/minio/md5-simd,SHL-0.5,0.85278857 -vendor/github.com/minio/minio,deprecated_AGPL-3.0,0.98848265 +vendor/github.com/minio/minio,AGPL-3.0-or-later,0.98848265 vendor/github.com/minio/minio-go/v7,SHL-0.5,0.85278857 vendor/github.com/minio/sha256-simd,SHL-0.5,0.85278857 vendor/github.com/mitchellh/copystructure,MIT-0,0.81595093 @@ -131,7 +131,7 @@ vendor/github.com/pkg/errors,BSD-2-Clause-NetBSD,0.9076087 vendor/github.com/prometheus/client_golang,SHL-0.5,0.85278857 vendor/github.com/prometheus/client_model,SHL-0.5,0.85278857 vendor/github.com/prometheus/common,SHL-0.5,0.85278857 -vendor/github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,EUDatagrid,0.33333334 +vendor/github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg,Nokia,0.33333334 vendor/github.com/prometheus/procfs,SHL-0.5,0.85278857 vendor/github.com/prometheus/prom2json,SHL-0.5,0.85278857 vendor/github.com/rs/xid,MIT-0,0.8125 @@ -186,7 +186,7 @@ vendor/k8s.io/client-go,SHL-0.5,0.85278857 vendor/k8s.io/klog/v2,SHL-0.5,0.8657289 vendor/k8s.io/utils,SHL-0.5,0.85278857 vendor/sigs.k8s.io/structured-merge-diff/v4,SHL-0.5,0.85214007 -p,BSD-3-Clause,0.9306931 +D-3-Clause,0.9306931 src/github.com/golang/dep/vendor/github.com/pelletier/go-toml,MIT-0,0.81595093 vendor/github.com/Masterminds/sprig,MIT-0,0.8136646 vendor/github.com/awnumar/memcall,SHL-0.5,0.85214007 diff --git a/tools/queue-status/sqs_job.yaml b/tools/queue-status/sqs_job.yaml index 347c4df37..f423a0c36 100644 --- a/tools/queue-status/sqs_job.yaml +++ b/tools/queue-status/sqs_job.yaml @@ -47,7 +47,8 @@ spec: - configMapRef: name: studioml-env-{{$uuid}} # Digest should be used to prevent version drift, prevented using idempotent SHA256 digest - image: quay.io/leafai/studio-go-runner:0.14.0-main-aaaagqxwidj + #image: quay.io/leafai/studio-go-runner:0.14.0-main-aaaagqxwidj + image: quay.io/leafai/studio-go-runner:0.14.0-main-aaaagrfcxkq imagePullPolicy: Always resources: limits: