Skip to content

Commit

Permalink
ci: Spack docker containers (#3039)
Browse files Browse the repository at this point in the history
* Update tag, add PySolver snippet, adjust lai setting

* Generalize umpire dependency

* Add LAI interface toggle

* Remove uberenv submodule and uberenv/spack configuration files from GEOS repository

* Specify path to ATS python3 executable

* Update docs

* Update docs with info about LC script

* Sherlock changes; adjust hypre/trilinos defaults, make lai setting explicit per job

* disable sherlock unit tests
  • Loading branch information
bmhan12 authored Feb 12, 2025
1 parent f1cf8bf commit 116560f
Show file tree
Hide file tree
Showing 42 changed files with 312 additions and 4,547 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"build": {
"dockerfile": "Dockerfile",
"args": {
"GEOS_TPL_TAG": "290-594"
"GEOS_TPL_TAG": "261-601"
}
},
"runArgs": [
Expand Down
22 changes: 19 additions & 3 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
steps:
- name: does_pr_have_necessary_labels
if: ${{inputs.REQUIRED_LABEL && github.event_name == 'pull_request'}}
run: |
run: |
pr_json=$(curl -H "Accept: application/vnd.github+json" https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.number }})
LABELS=$(echo ${pr_json} | jq -crM '[.labels[].name]')
echo " the labels are ${LABELS}"
Expand Down Expand Up @@ -197,16 +197,32 @@ jobs:
script_args+=(--repository ${GITHUB_WORKSPACE_MOUNT_POINT})
# The linear algebra environment variables (ENABLE_HYPRE, ENABLE_HYPRE_DEVICE & ENABLE_TRILINOS)
# could be passed as scripts parameters as well, but a specific care must be taken to be sure
# are passed as scripts parameters. Specific care must be taken to be sure
# there's no conflict with the host-config files.
# Hypre
ENABLE_HYPRE=${{ inputs.ENABLE_HYPRE }}
ENABLE_HYPRE_DEVICE=${{ inputs.ENABLE_HYPRE_DEVICE }}
if [ ! -z "${{ inputs.ENABLE_HYPRE }}" ]; then
script_args+=(--enable-hypre "${{ inputs.ENABLE_HYPRE }}")
fi
# Trilinos
ENABLE_TRILINOS=${{ inputs.ENABLE_TRILINOS }}
docker_args+=(-e ENABLE_HYPRE=${ENABLE_HYPRE:-OFF})
docker_args+=(-e ENABLE_HYPRE_DEVICE=${ENABLE_HYPRE_DEVICE:-CPU})
docker_args+=(-e ENABLE_TRILINOS=${ENABLE_TRILINOS:-ON})
docker_args+=(-e GEOS_BUILD_SHARED_LIBS=${{ inputs.BUILD_SHARED_LIBS }})
if [ ! -z "${{ inputs.ENABLE_TRILINOS }}" ]; then
script_args+=(--enable-trilinos "${{ inputs.ENABLE_TRILINOS }}")
fi
# ENABLE_HYPRE_DEVICE
ENABLE_HYPRE_DEVICE=${{ inputs.ENABLE_HYPRE_DEVICE }}
if [ ! -z "${{ inputs.ENABLE_HYPRE_DEVICE }}" ]; then
script_args+=(--enable-hypre-device "${{ inputs.ENABLE_HYPRE_DEVICE }}")
fi
docker_args+=(--cap-add=SYS_PTRACE --rm)
script_args+=(--cmake-build-type ${{ inputs.CMAKE_BUILD_TYPE }})
Expand Down
31 changes: 26 additions & 5 deletions .github/workflows/ci_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -150,16 +150,25 @@ jobs:
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc9
BUILD_SHARED_LIBS: ON
ENABLE_HYPRE: OFF
ENABLE_TRILINOS: ON
HOST_CONFIG: /spack-generated.cmake

- name: Ubuntu debug (20.04, gcc 10.5.0, open-mpi 4.0.3) - github codespaces
CMAKE_BUILD_TYPE: Debug
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc10
BUILD_SHARED_LIBS: ON
ENABLE_HYPRE: OFF
ENABLE_TRILINOS: ON
HOST_CONFIG: /spack-generated.cmake

- name: Ubuntu (20.04, gcc 10.5.0, open-mpi 4.0.3) - github codespaces
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc10
BUILD_SHARED_LIBS: ON
ENABLE_HYPRE: OFF
ENABLE_TRILINOS: ON
HOST_CONFIG: /spack-generated.cmake

- name: Ubuntu (22.04, gcc 11.4.0, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
Expand All @@ -168,27 +177,33 @@ jobs:
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/ubuntu22.04-gcc11
BUILD_SHARED_LIBS: ON
HOST_CONFIG: /spack-generated.cmake

- name: Ubuntu (22.04, gcc 12.3.0, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-gcc12
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
BUILD_SHARED_LIBS: ON
HOST_CONFIG: /spack-generated.cmake

- name: Ubuntu (22.04, clang 15.0.7, open-mpi 4.1.2)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/ubuntu22.04-clang15
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
BUILD_SHARED_LIBS: ON
HOST_CONFIG: /spack-generated.cmake

- name: Sherlock CPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10)
CMAKE_BUILD_TYPE: Release
DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-openblas0.3.10-zlib1.2.11
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10.cmake
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests"
GCP_BUCKET: geosx/Sherlock-CPU
HOST_CONFIG: /spack-generated.cmake
# HOST_CONFIG: host-configs/Stanford/sherlock-gcc10.cmake
BUILD_SHARED_LIBS: ON

uses: ./.github/workflows/build_and_test.yml
Expand All @@ -199,6 +214,7 @@ jobs:
DOCKER_REPOSITORY: ${{ matrix.DOCKER_REPOSITORY }}
ENABLE_HYPRE: ${{ matrix.ENABLE_HYPRE }}
ENABLE_TRILINOS: ${{ matrix.ENABLE_TRILINOS }}
BUILD_AND_TEST_CLI_ARGS: ${{ matrix.BUILD_AND_TEST_CLI_ARGS }}
GCP_BUCKET: ${{ matrix.GCP_BUCKET }}
HOST_CONFIG: ${{ matrix.HOST_CONFIG }}
RUNS_ON: ubuntu-22.04
Expand Down Expand Up @@ -228,6 +244,7 @@ jobs:
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
REQUIRED_LABEL: "ci: run integrated tests"
LOCAL_BASELINE_DIR: /data/GEOS/baselines
HOST_CONFIG: /spack-generated.cmake

baseline_log:
needs: [is_not_draft_pull_request]
Expand Down Expand Up @@ -260,7 +277,7 @@ jobs:
GCP_BUCKET: geosx/ubuntu22.04-gcc11
RUNS_ON: Runner_4core_16GB
REQUIRED_LABEL: "ci: run code coverage"

HOST_CONFIG: /spack-generated.cmake

# mac_builds:
# needs:
Expand Down Expand Up @@ -296,6 +313,7 @@ jobs:
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"
HOST_CONFIG: /spack-generated.cmake

- name: Ubuntu CUDA (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89)
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
Expand All @@ -310,7 +328,8 @@ jobs:
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-certificates"

HOST_CONFIG: /spack-generated.cmake

- name: Rockylinux CUDA (8, clang 17.0.6, cuda 12.5.1)
BUILD_AND_TEST_CLI_ARGS: "--no-install-schema"
CMAKE_BUILD_TYPE: Release
Expand All @@ -324,6 +343,7 @@ jobs:
DOCKER_RUN_ARGS: "--cpus=8 --memory=256g --runtime=nvidia --gpus all -v /etc/pki/ca-trust/source/anchors/:/usr/local/share/ca-certificates/llnl:ro"
DOCKER_CERTS_DIR: "/usr/local/share/ca-certificates"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
HOST_CONFIG: /spack-generated.cmake

- name: Rockylinux CUDA (8, gcc 8.5, cuda 12.5.1)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
Expand All @@ -338,13 +358,13 @@ jobs:
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
HOST_CONFIG: /spack-generated.cmake

- name: Pangea 3 CUDA (AlmaLinux 8.8, gcc 9.4.0, open-mpi 4.1.2, cuda 11.5.0, openblas 0.3.10)
BUILD_AND_TEST_CLI_ARGS: "--build-exe-only --no-install-schema"
CMAKE_BUILD_TYPE: Release
BUILD_GENERATOR: "--makefile"
DOCKER_REPOSITORY: geosx/pangea3-almalinux8-gcc9.4-openmpi4.1.2-cuda11.5.0-openblas0.3.18
HOST_CONFIG: host-configs/TOTAL/pangea3-gcc8.4.1-openmpi-4.1.2-wave-solver.cmake
ENABLE_HYPRE_DEVICE: CUDA
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
Expand All @@ -354,6 +374,7 @@ jobs:
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
HOST_CONFIG: /spack-generated-wave-solver-only.cmake

- name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, cuda 12.4.0,)
BUILD_AND_TEST_CLI_ARGS: "--no-run-unit-tests --no-install-schema"
Expand All @@ -364,12 +385,12 @@ jobs:
ENABLE_HYPRE: ON
ENABLE_TRILINOS: OFF
GCP_BUCKET: geosx/Sherlock-GPU
HOST_CONFIG: host-configs/Stanford/sherlock-gcc10-cuda12-sm70.cmake
RUNS_ON: streak2
NPROC: 8
DOCKER_RUN_ARGS: "--cpus=8 --memory=128g --runtime=nvidia -v /etc/pki/ca-trust/source/anchors/:/etc/pki/ca-trust/source/anchors/llnl:ro"
DOCKER_CERTS_DIR: "/etc/pki/ca-trust/source/anchors"
DOCKER_CERTS_UPDATE_COMMAND: "update-ca-trust"
HOST_CONFIG: /spack-generated.cmake

# Below this line, jobs that deploy to Google Cloud.

Expand Down
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,3 @@
[submodule "src/coreComponents/fileIO/coupling/hdf5_interface"]
path = src/coreComponents/fileIO/coupling/hdf5_interface
url = ../../GEOS-DEV/hdf5_interface.git
[submodule "scripts/uberenv"]
path = scripts/uberenv
url = ../../LLNL/uberenv.git
11 changes: 0 additions & 11 deletions .uberenv_config.json

This file was deleted.

28 changes: 25 additions & 3 deletions scripts/ci_build_and_test_in_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ Usage: $0
run a code build and test.
--data-basename output.tar.gz
If some data needs to be extracted from the build, the argument will define the tarball. Has to be a `tar.gz`.
--enable-hypre
One of ON or OFF (default is ON). Build geos with hypre.
--enable-hypre-device
One of CPU, CUDA, or HIP (default is CPU). Build geos with hypre GPU support.
--enable-trilinos
One of ON or OFF (default is OFF). Build geos with trilinos.
--exchange-dir /path/to/exchange
Folder to share data with outside of the container.
--host-config host-config/my_config.cmake
Expand Down Expand Up @@ -71,19 +77,22 @@ exit 1
# Then we'll move to the build dir.
or_die cd $(dirname $0)/..

# Parsing using getopt
args=$(or_die getopt -a -o h --long build-exe-only,cmake-build-type:,code-coverage,data-basename:,exchange-dir:,host-config:,install-dir-basename:,makefile,ninja,no-install-schema,no-run-unit-tests,nproc:,repository:,run-integrated-tests,sccache-credentials:,test-code-style,test-documentation,help -- "$@")
args=$(or_die getopt -a -o h --long build-exe-only,cmake-build-type:,code-coverage,data-basename:,enable-hypre:,enable-hypre-device:,enable-trilinos:,exchange-dir:,host-config:,install-dir-basename:,makefile,ninja,no-install-schema,no-run-unit-tests,nproc:,repository:,run-integrated-tests,sccache-credentials:,test-code-style,test-documentation,help -- "$@")

# Variables with default values
BUILD_EXE_ONLY=false
BUILD_GENERATOR=""
GEOS_INSTALL_SCHEMA=true
HOST_CONFIG="host-configs/environment.cmake"
ENABLE_HYPRE=ON
ENABLE_HYPRE_DEVICE=CPU
GEOS_LA_INTERFACE=Hypre
RUN_UNIT_TESTS=true
RUN_INTEGRATED_TESTS=false
UPLOAD_TEST_BASELINES=false
TEST_CODE_STYLE=false
TEST_DOCUMENTATION=false
ENABLE_TRILINOS=OFF
CODE_COVERAGE=false
NPROC="$(nproc)"

Expand All @@ -109,6 +118,9 @@ do
fi
unset DATA_BASENAME DATA_BASENAME_EXT
shift 2;;
--enable-hypre) ENABLE_HYPRE=$2; shift 2;;
--enable-hypre-device) ENABLE_HYPRE_DEVICE=$2; shift 2;;
--enable-trilinos) ENABLE_TRILINOS=$2; shift 2;;
--exchange-dir) DATA_EXCHANGE_DIR=$2; shift 2;;
--host-config) HOST_CONFIG=$2; shift 2;;
--install-dir-basename) GEOS_DIR=${GEOSX_TPL_DIR}/../$2; shift 2;;
Expand Down Expand Up @@ -141,6 +153,12 @@ if [[ -z "${GEOS_DIR}" ]]; then
GEOS_DIR=/dev/null
fi

if [[ "${ENABLE_HYPRE}" = ON ]]; then
GEOS_LA_INTERFACE=Hypre
else
GEOS_LA_INTERFACE=Trilinos
fi

if [[ ! -z "${SCCACHE_CREDS}" ]]; then
# The credential json file is available at the root of the geos repository.
# We hereafter create the config file that points to it.
Expand Down Expand Up @@ -200,7 +218,7 @@ if [[ "${RUN_INTEGRATED_TESTS}" = true ]]; then
ATS_WORKING_DIR=$tempdir/GEOS_integratedTests_working

export ATS_FILTER="np<=32"
ATS_CMAKE_ARGS="-DATS_ARGUMENTS=\"--machine openmpi --ats openmpi_mpirun=/usr/bin/mpirun --ats openmpi_args=--allow-run-as-root --ats openmpi_procspernode=32 --ats openmpi_maxprocs=32\" -DPython3_ROOT_DIR=${ATS_PYTHON_HOME} -DATS_BASELINE_DIR=${ATS_BASELINE_DIR} -DATS_WORKING_DIR=${ATS_WORKING_DIR}"
ATS_CMAKE_ARGS="-DATS_ARGUMENTS=\"--machine openmpi --ats openmpi_mpirun=/usr/bin/mpirun --ats openmpi_args=--allow-run-as-root --ats openmpi_procspernode=32 --ats openmpi_maxprocs=32\" -DPython3_ROOT_DIR=${ATS_PYTHON_HOME} -DPython3_EXECUTABLE=${ATS_PYTHON_HOME}/bin/python3 -DATS_BASELINE_DIR=${ATS_BASELINE_DIR} -DATS_WORKING_DIR=${ATS_WORKING_DIR}"
fi


Expand Down Expand Up @@ -233,6 +251,10 @@ or_die python3 scripts/config-build.py \
${BUILD_GENERATOR} \
-DBLT_MPI_COMMAND_APPEND='"--allow-run-as-root;--oversubscribe"' \
-DGEOS_INSTALL_SCHEMA=${GEOS_INSTALL_SCHEMA} \
-DENABLE_HYPRE=${ENABLE_HYPRE} \
-DENABLE_HYPRE_DEVICE=${ENABLE_HYPRE_DEVICE} \
-DENABLE_TRILINOS=${ENABLE_TRILINOS} \
-DGEOS_LA_INTERFACE:PATH=${GEOS_LA_INTERFACE} \
-DENABLE_COVERAGE=$([[ "${CODE_COVERAGE}" = true ]] && echo 1 || echo 0) \
${SCCACHE_CMAKE_ARGS} \
${ATS_CMAKE_ARGS}
Expand Down
Loading

0 comments on commit 116560f

Please sign in to comment.