Merge the patch folder from the master barnch

Iximiel · Iximiel · Oct 3, 2023 · Oct 3, 2023 · Oct 6, 2023 · Oct 6, 2023
commit 1fef0d725822dc6a323fd7369c1c181ebae4d974
diff --git a/patches/gromacs-2023.config → patches/gromacs-2023.2.config b/patches/gromacs-2023.config → patches/gromacs-2023.2.config
diff --git a/...macs-2023.diff/src/gromacs/CMakeLists.txt → ...cs-2023.2.diff/src/gromacs/CMakeLists.txt b/...macs-2023.diff/src/gromacs/CMakeLists.txt → ...cs-2023.2.diff/src/gromacs/CMakeLists.txt
@@ -202,7 +202,7 @@ if(GMX_GPU_FFT_VKFFT)
     target_link_libraries(libgromacs PRIVATE VkFFT)
 endif()
 if(GMX_GPU_FFT_ROCFFT)
-    target_link_libraries(libgromacs PUBLIC roc::rocfft)
+    target_link_libraries(libgromacs PRIVATE roc::rocfft)
 endif()
 
 target_link_libraries(libgromacs PRIVATE $<BUILD_INTERFACE:common>)

diff --git a/...diff/src/gromacs/CMakeLists.txt.preplumed → ...diff/src/gromacs/CMakeLists.txt.preplumed b/...diff/src/gromacs/CMakeLists.txt.preplumed → ...diff/src/gromacs/CMakeLists.txt.preplumed
@@ -200,7 +200,7 @@ if(GMX_GPU_FFT_VKFFT)
     target_link_libraries(libgromacs PRIVATE VkFFT)
 endif()
 if(GMX_GPU_FFT_ROCFFT)
-    target_link_libraries(libgromacs PUBLIC roc::rocfft)
+    target_link_libraries(libgromacs PRIVATE roc::rocfft)
 endif()
 
 target_link_libraries(libgromacs PRIVATE $<BUILD_INTERFACE:common>)

diff --git a/...-2023.diff/src/gromacs/mdlib/expanded.cpp → ...023.2.diff/src/gromacs/mdlib/expanded.cpp b/...-2023.diff/src/gromacs/mdlib/expanded.cpp → ...023.2.diff/src/gromacs/mdlib/expanded.cpp
diff --git a/.../src/gromacs/mdlib/expanded.cpp.preplumed → .../src/gromacs/mdlib/expanded.cpp.preplumed b/.../src/gromacs/mdlib/expanded.cpp.preplumed → .../src/gromacs/mdlib/expanded.cpp.preplumed
diff --git a/...cs-2023.diff/src/gromacs/mdlib/expanded.h → ...-2023.2.diff/src/gromacs/mdlib/expanded.h b/...cs-2023.diff/src/gromacs/mdlib/expanded.h → ...-2023.2.diff/src/gromacs/mdlib/expanded.h
diff --git a/...ff/src/gromacs/mdlib/expanded.h.preplumed → ...ff/src/gromacs/mdlib/expanded.h.preplumed b/...ff/src/gromacs/mdlib/expanded.h.preplumed → ...ff/src/gromacs/mdlib/expanded.h.preplumed
diff --git a/...-2023.diff/src/gromacs/mdlib/sim_util.cpp → ...023.2.diff/src/gromacs/mdlib/sim_util.cpp b/...-2023.diff/src/gromacs/mdlib/sim_util.cpp → ...023.2.diff/src/gromacs/mdlib/sim_util.cpp
@@ -734,21 +734,23 @@ static void computeSpecialForces(FILE*                          fplog,
  * \param[in]  stepWork             Step schedule flags
  * \param[in]  xReadyOnDevice       Event synchronizer indicating that the coordinates are ready in the device memory.
  * \param[in]  lambdaQ              The Coulomb lambda of the current state.
+ * \param[in]  useMdGpuGraph        Whether MD GPU Graph is in use.
  * \param[in]  wcycle               The wallcycle structure
  */
 static inline void launchPmeGpuSpread(gmx_pme_t*            pmedata,
                                       const matrix          box,
                                       const StepWorkload&   stepWork,
                                       GpuEventSynchronizer* xReadyOnDevice,
                                       const real            lambdaQ,
+                                      bool                  useMdGpuGraph,
                                       gmx_wallcycle*        wcycle)
 {
     wallcycle_start(wcycle, WallCycleCounter::PmeGpuMesh);
     pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
     bool                           useGpuDirectComm         = false;
     gmx::PmeCoordinateReceiverGpu* pmeCoordinateReceiverGpu = nullptr;
     pme_gpu_launch_spread(
-            pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu);
+            pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu, useMdGpuGraph);
     wallcycle_stop(wcycle, WallCycleCounter::PmeGpuMesh);
 }
 
@@ -1574,6 +1576,7 @@ void do_force(FILE*                               fplog,
                            stepWork,
                            localXReadyOnDevice,
                            lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
+                           simulationWork.useMdGpuGraph,
                            wcycle);
     }
 

diff --git a/.../src/gromacs/mdlib/sim_util.cpp.preplumed → .../src/gromacs/mdlib/sim_util.cpp.preplumed b/.../src/gromacs/mdlib/sim_util.cpp.preplumed → .../src/gromacs/mdlib/sim_util.cpp.preplumed
@@ -728,21 +728,23 @@ static void computeSpecialForces(FILE*                          fplog,
  * \param[in]  stepWork             Step schedule flags
  * \param[in]  xReadyOnDevice       Event synchronizer indicating that the coordinates are ready in the device memory.
  * \param[in]  lambdaQ              The Coulomb lambda of the current state.
+ * \param[in]  useMdGpuGraph        Whether MD GPU Graph is in use.
  * \param[in]  wcycle               The wallcycle structure
  */
 static inline void launchPmeGpuSpread(gmx_pme_t*            pmedata,
                                       const matrix          box,
                                       const StepWorkload&   stepWork,
                                       GpuEventSynchronizer* xReadyOnDevice,
                                       const real            lambdaQ,
+                                      bool                  useMdGpuGraph,
                                       gmx_wallcycle*        wcycle)
 {
     wallcycle_start(wcycle, WallCycleCounter::PmeGpuMesh);
     pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
     bool                           useGpuDirectComm         = false;
     gmx::PmeCoordinateReceiverGpu* pmeCoordinateReceiverGpu = nullptr;
     pme_gpu_launch_spread(
-            pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu);
+            pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu, useMdGpuGraph);
     wallcycle_stop(wcycle, WallCycleCounter::PmeGpuMesh);
 }
 
@@ -1568,6 +1570,7 @@ void do_force(FILE*                               fplog,
                            stepWork,
                            localXReadyOnDevice,
                            lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
+                           simulationWork.useMdGpuGraph,
                            wcycle);
     }
 

diff --git a/.../src/gromacs/mdrun/legacymdrunoptions.cpp → .../src/gromacs/mdrun/legacymdrunoptions.cpp b/.../src/gromacs/mdrun/legacymdrunoptions.cpp → .../src/gromacs/mdrun/legacymdrunoptions.cpp
diff --git a/...cs/mdrun/legacymdrunoptions.cpp.preplumed → ...cs/mdrun/legacymdrunoptions.cpp.preplumed b/...cs/mdrun/legacymdrunoptions.cpp.preplumed → ...cs/mdrun/legacymdrunoptions.cpp.preplumed
diff --git a/...ff/src/gromacs/mdrun/legacymdrunoptions.h → ...ff/src/gromacs/mdrun/legacymdrunoptions.h b/...ff/src/gromacs/mdrun/legacymdrunoptions.h → ...ff/src/gromacs/mdrun/legacymdrunoptions.h
diff --git a/...macs/mdrun/legacymdrunoptions.h.preplumed → ...macs/mdrun/legacymdrunoptions.h.preplumed b/...macs/mdrun/legacymdrunoptions.h.preplumed → ...macs/mdrun/legacymdrunoptions.h.preplumed
diff --git a/...romacs-2023.diff/src/gromacs/mdrun/md.cpp → ...macs-2023.2.diff/src/gromacs/mdrun/md.cpp b/...romacs-2023.diff/src/gromacs/mdrun/md.cpp → ...macs-2023.2.diff/src/gromacs/mdrun/md.cpp
@@ -1140,7 +1140,7 @@ void gmx::LegacySimulator::do_md()
         if (plumedswitch && bHREX) {
           // gmx_enerdata_t *hrex_enerd;
           int nlambda = enerd->foreignLambdaTerms.numLambdas();
-          gmx_enerdata_t hrex_enerd(enerd->grpp.nener, nlambda == 0 ? 0 : nlambda - 1);
+          gmx_enerdata_t hrex_enerd(enerd->grpp.nener, nlambda == 0 ? nullptr: &inputrec->fepvals->all_lambda);
           int repl  = -1;
           int nrepl = -1;
           if (MAIN(cr)){

diff --git a/...3.diff/src/gromacs/mdrun/md.cpp.preplumed → ...2.diff/src/gromacs/mdrun/md.cpp.preplumed b/...3.diff/src/gromacs/mdrun/md.cpp.preplumed → ...2.diff/src/gromacs/mdrun/md.cpp.preplumed
diff --git a/...-2023.diff/src/gromacs/mdrun/minimize.cpp → ...023.2.diff/src/gromacs/mdrun/minimize.cpp b/...-2023.diff/src/gromacs/mdrun/minimize.cpp → ...023.2.diff/src/gromacs/mdrun/minimize.cpp
diff --git a/.../src/gromacs/mdrun/minimize.cpp.preplumed → .../src/gromacs/mdrun/minimize.cpp.preplumed b/.../src/gromacs/mdrun/minimize.cpp.preplumed → .../src/gromacs/mdrun/minimize.cpp.preplumed
diff --git a/...iff/src/gromacs/mdrun/replicaexchange.cpp → ...iff/src/gromacs/mdrun/replicaexchange.cpp b/...iff/src/gromacs/mdrun/replicaexchange.cpp → ...iff/src/gromacs/mdrun/replicaexchange.cpp
diff --git a/...omacs/mdrun/replicaexchange.cpp.preplumed → ...omacs/mdrun/replicaexchange.cpp.preplumed b/...omacs/mdrun/replicaexchange.cpp.preplumed → ...omacs/mdrun/replicaexchange.cpp.preplumed
diff --git a/....diff/src/gromacs/mdrun/replicaexchange.h → ....diff/src/gromacs/mdrun/replicaexchange.h b/....diff/src/gromacs/mdrun/replicaexchange.h → ....diff/src/gromacs/mdrun/replicaexchange.h
diff --git a/...gromacs/mdrun/replicaexchange.h.preplumed → ...gromacs/mdrun/replicaexchange.h.preplumed b/...gromacs/mdrun/replicaexchange.h.preplumed → ...gromacs/mdrun/replicaexchange.h.preplumed
diff --git a/...acs-2023.diff/src/gromacs/mdrun/rerun.cpp → ...s-2023.2.diff/src/gromacs/mdrun/rerun.cpp b/...acs-2023.diff/src/gromacs/mdrun/rerun.cpp → ...s-2023.2.diff/src/gromacs/mdrun/rerun.cpp
diff --git a/...iff/src/gromacs/mdrun/rerun.cpp.preplumed → ...iff/src/gromacs/mdrun/rerun.cpp.preplumed b/...iff/src/gromacs/mdrun/rerun.cpp.preplumed → ...iff/src/gromacs/mdrun/rerun.cpp.preplumed
diff --git a/...cs-2023.diff/src/gromacs/mdrun/runner.cpp → ...-2023.2.diff/src/gromacs/mdrun/runner.cpp b/...cs-2023.diff/src/gromacs/mdrun/runner.cpp → ...-2023.2.diff/src/gromacs/mdrun/runner.cpp
@@ -72,6 +72,7 @@
 #include "gromacs/fileio/gmxfio.h"
 #include "gromacs/fileio/oenv.h"
 #include "gromacs/fileio/tpxio.h"
+#include "gromacs/fileio/trrio.h"
 #include "gromacs/gmxlib/network.h"
 #include "gromacs/gmxlib/nrnb.h"
 #include "gromacs/gpu_utils/device_stream_manager.h"
@@ -937,6 +938,19 @@ int Mdrunner::mdrunner()
          */
         applyGlobalSimulationState(
                 *inputHolder_.get(), partialDeserializedTpr.get(), globalState.get(), inputrec.get(), &mtop);
+
+        static_assert(sc_trrMaxAtomCount == sc_checkpointMaxAtomCount);
+        if (mtop.natoms > sc_checkpointMaxAtomCount)
+        {
+            gmx_fatal(FARGS,
+                      "System has %d atoms, which is more than can be stored in checkpoint and trr "
+                      "files (max %" PRId64 ")",
+                      mtop.natoms,
+                      sc_checkpointMaxAtomCount);
+        }
+
+        // The XTC format has been updated to support up to 2^31-1 atoms, which is anyway the
+        // largest supported by GROMACS, so no need for any particular check here.
     }
 
     /* Check and update the hardware options for internal consistency */
@@ -1078,11 +1092,12 @@ int Mdrunner::mdrunner()
     // the task-deciding functions and will agree on the result
     // without needing to communicate.
     // The LBFGS minimizer, test-particle insertion, normal modes and shell dynamics don't support DD
+    const bool hasCustomParallelization =
+            (EI_TPI(inputrec->eI) || inputrec->eI == IntegrationAlgorithm::NM);
     const bool canUseDomainDecomposition =
-            !(inputrec->eI == IntegrationAlgorithm::LBFGS || EI_TPI(inputrec->eI)
-              || inputrec->eI == IntegrationAlgorithm::NM
-              || gmx_mtop_particletype_count(mtop)[ParticleType::Shell] > 0);
-    GMX_RELEASE_ASSERT(!PAR(cr) || canUseDomainDecomposition,
+            (inputrec->eI != IntegrationAlgorithm::LBFGS && !hasCustomParallelization
+             && gmx_mtop_particletype_count(mtop)[ParticleType::Shell] == 0);
+    GMX_RELEASE_ASSERT(!PAR(cr) || hasCustomParallelization || canUseDomainDecomposition,
                        "A parallel run should not arrive here without DD support");
 
     int useDDWithSingleRank = -1;
@@ -1455,6 +1470,9 @@ int Mdrunner::mdrunner()
     else
     {
         /* PME, if used, is done on all nodes with 1D decomposition */
+        cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
+        cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
+
         cr->nnodes     = cr->sizeOfDefaultCommunicator;
         cr->sim_nodeid = cr->rankInDefaultCommunicator;
         cr->nodeid     = cr->rankInDefaultCommunicator;
@@ -2109,7 +2127,7 @@ int Mdrunner::mdrunner()
 
         /* Energy terms and groups */
         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
-                             inputrec->fepvals->n_lambda);
+                             &inputrec->fepvals->all_lambda);
 
         // cos acceleration is only supported by md, but older tpr
         // files might still combine it with other integrators

diff --git a/...ff/src/gromacs/mdrun/runner.cpp.preplumed → ...ff/src/gromacs/mdrun/runner.cpp.preplumed b/...ff/src/gromacs/mdrun/runner.cpp.preplumed → ...ff/src/gromacs/mdrun/runner.cpp.preplumed
@@ -72,6 +72,7 @@
 #include "gromacs/fileio/gmxfio.h"
 #include "gromacs/fileio/oenv.h"
 #include "gromacs/fileio/tpxio.h"
+#include "gromacs/fileio/trrio.h"
 #include "gromacs/gmxlib/network.h"
 #include "gromacs/gmxlib/nrnb.h"
 #include "gromacs/gpu_utils/device_stream_manager.h"
@@ -927,6 +928,19 @@ int Mdrunner::mdrunner()
          */
         applyGlobalSimulationState(
                 *inputHolder_.get(), partialDeserializedTpr.get(), globalState.get(), inputrec.get(), &mtop);
+
+        static_assert(sc_trrMaxAtomCount == sc_checkpointMaxAtomCount);
+        if (mtop.natoms > sc_checkpointMaxAtomCount)
+        {
+            gmx_fatal(FARGS,
+                      "System has %d atoms, which is more than can be stored in checkpoint and trr "
+                      "files (max %" PRId64 ")",
+                      mtop.natoms,
+                      sc_checkpointMaxAtomCount);
+        }
+
+        // The XTC format has been updated to support up to 2^31-1 atoms, which is anyway the
+        // largest supported by GROMACS, so no need for any particular check here.
     }
 
     /* Check and update the hardware options for internal consistency */
@@ -1068,11 +1082,12 @@ int Mdrunner::mdrunner()
     // the task-deciding functions and will agree on the result
     // without needing to communicate.
     // The LBFGS minimizer, test-particle insertion, normal modes and shell dynamics don't support DD
+    const bool hasCustomParallelization =
+            (EI_TPI(inputrec->eI) || inputrec->eI == IntegrationAlgorithm::NM);
     const bool canUseDomainDecomposition =
-            !(inputrec->eI == IntegrationAlgorithm::LBFGS || EI_TPI(inputrec->eI)
-              || inputrec->eI == IntegrationAlgorithm::NM
-              || gmx_mtop_particletype_count(mtop)[ParticleType::Shell] > 0);
-    GMX_RELEASE_ASSERT(!PAR(cr) || canUseDomainDecomposition,
+            (inputrec->eI != IntegrationAlgorithm::LBFGS && !hasCustomParallelization
+             && gmx_mtop_particletype_count(mtop)[ParticleType::Shell] == 0);
+    GMX_RELEASE_ASSERT(!PAR(cr) || hasCustomParallelization || canUseDomainDecomposition,
                        "A parallel run should not arrive here without DD support");
 
     int useDDWithSingleRank = -1;
@@ -1441,6 +1456,9 @@ int Mdrunner::mdrunner()
     else
     {
         /* PME, if used, is done on all nodes with 1D decomposition */
+        cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
+        cr->mpi_comm_mysim   = cr->mpiDefaultCommunicator;
+
         cr->nnodes     = cr->sizeOfDefaultCommunicator;
         cr->sim_nodeid = cr->rankInDefaultCommunicator;
         cr->nodeid     = cr->rankInDefaultCommunicator;
@@ -2095,7 +2113,7 @@ int Mdrunner::mdrunner()
 
         /* Energy terms and groups */
         gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
-                             inputrec->fepvals->n_lambda);
+                             &inputrec->fepvals->all_lambda);
 
         // cos acceleration is only supported by md, but older tpr
         // files might still combine it with other integrators

diff --git a/...ularsimulator/expandedensembleelement.cpp → ...ularsimulator/expandedensembleelement.cpp b/...ularsimulator/expandedensembleelement.cpp → ...ularsimulator/expandedensembleelement.cpp
diff --git a/...tor/expandedensembleelement.cpp.preplumed → ...tor/expandedensembleelement.cpp.preplumed b/...tor/expandedensembleelement.cpp.preplumed → ...tor/expandedensembleelement.cpp.preplumed
diff --git a/...gromacs/taskassignment/decidegpuusage.cpp → ...gromacs/taskassignment/decidegpuusage.cpp b/...gromacs/taskassignment/decidegpuusage.cpp → ...gromacs/taskassignment/decidegpuusage.cpp
@@ -625,6 +625,8 @@ bool decideWhetherToUseGpuForUpdate(const bool           isDomainDecomposition,
     const bool pmeSpreadGatherUsesCpu = (pmeRunMode == PmeRunMode::CPU);
 
     std::string errorMessage;
+    // Flag to set if we do not want to log the error with `-update auto` (e.g., for non-GPU build)
+    bool silenceWarningMessageWithUpdateAuto = forceCpuUpdateDefault;
 
     if (isDomainDecomposition)
     {
@@ -664,14 +666,19 @@ bool decideWhetherToUseGpuForUpdate(const bool           isDomainDecomposition,
     {
         errorMessage +=
                 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
+        silenceWarningMessageWithUpdateAuto = true;
     }
     if (!gpusWereDetected)
     {
         errorMessage += "Compatible GPUs must have been found.\n";
+        silenceWarningMessageWithUpdateAuto = true;
     }
     if (!(GMX_GPU_CUDA || GMX_GPU_SYCL))
     {
         errorMessage += "Only CUDA and SYCL builds are supported.\n";
+        // Silence clang-analyzer deadcode.DeadStores warning about ignoring the previous assignments
+        GMX_UNUSED_VALUE(silenceWarningMessageWithUpdateAuto);
+        silenceWarningMessageWithUpdateAuto = true;
     }
     if (inputrec.eI != IntegrationAlgorithm::MD)
     {
@@ -768,7 +775,7 @@ bool decideWhetherToUseGpuForUpdate(const bool           isDomainDecomposition,
 
     if (!errorMessage.empty())
     {
-        if (updateTarget == TaskTarget::Auto && !forceCpuUpdateDefault)
+        if (updateTarget == TaskTarget::Auto && !silenceWarningMessageWithUpdateAuto)
         {
             GMX_LOG(mdlog.info)
                     .asParagraph()

diff --git a/...skassignment/decidegpuusage.cpp.preplumed → ...skassignment/decidegpuusage.cpp.preplumed b/...skassignment/decidegpuusage.cpp.preplumed → ...skassignment/decidegpuusage.cpp.preplumed
@@ -624,6 +624,8 @@ bool decideWhetherToUseGpuForUpdate(const bool           isDomainDecomposition,
     const bool pmeSpreadGatherUsesCpu = (pmeRunMode == PmeRunMode::CPU);
 
     std::string errorMessage;
+    // Flag to set if we do not want to log the error with `-update auto` (e.g., for non-GPU build)
+    bool silenceWarningMessageWithUpdateAuto = forceCpuUpdateDefault;
 
     if (isDomainDecomposition)
     {
@@ -663,14 +665,19 @@ bool decideWhetherToUseGpuForUpdate(const bool           isDomainDecomposition,
     {
         errorMessage +=
                 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
+        silenceWarningMessageWithUpdateAuto = true;
     }
     if (!gpusWereDetected)
     {
         errorMessage += "Compatible GPUs must have been found.\n";
+        silenceWarningMessageWithUpdateAuto = true;
     }
     if (!(GMX_GPU_CUDA || GMX_GPU_SYCL))
     {
         errorMessage += "Only CUDA and SYCL builds are supported.\n";
+        // Silence clang-analyzer deadcode.DeadStores warning about ignoring the previous assignments
+        GMX_UNUSED_VALUE(silenceWarningMessageWithUpdateAuto);
+        silenceWarningMessageWithUpdateAuto = true;
     }
     if (inputrec.eI != IntegrationAlgorithm::MD)
     {
@@ -762,7 +769,7 @@ bool decideWhetherToUseGpuForUpdate(const bool           isDomainDecomposition,
 
     if (!errorMessage.empty())
     {
-        if (updateTarget == TaskTarget::Auto && !forceCpuUpdateDefault)
+        if (updateTarget == TaskTarget::Auto && !silenceWarningMessageWithUpdateAuto)
         {
             GMX_LOG(mdlog.info)
                     .asParagraph()

diff --git a/...e/gromacs/taskassignment/decidegpuusage.h → ...e/gromacs/taskassignment/decidegpuusage.h b/...e/gromacs/taskassignment/decidegpuusage.h → ...e/gromacs/taskassignment/decidegpuusage.h
diff --git a/...taskassignment/decidegpuusage.h.preplumed → ...taskassignment/decidegpuusage.h.preplumed b/...taskassignment/decidegpuusage.h.preplumed → ...taskassignment/decidegpuusage.h.preplumed