Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patching with cmake #36

Open
wants to merge 10 commits into
base: cmaketestSetUp
Choose a base branch
from
Prev Previous commit
Next Next commit
Merge the patch folder from the master barnch
  • Loading branch information
Iximiel committed Oct 12, 2023
commit 1fef0d725822dc6a323fd7369c1c181ebae4d974
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ if(GMX_GPU_FFT_VKFFT)
target_link_libraries(libgromacs PRIVATE VkFFT)
endif()
if(GMX_GPU_FFT_ROCFFT)
target_link_libraries(libgromacs PUBLIC roc::rocfft)
target_link_libraries(libgromacs PRIVATE roc::rocfft)
endif()

target_link_libraries(libgromacs PRIVATE $<BUILD_INTERFACE:common>)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ if(GMX_GPU_FFT_VKFFT)
target_link_libraries(libgromacs PRIVATE VkFFT)
endif()
if(GMX_GPU_FFT_ROCFFT)
target_link_libraries(libgromacs PUBLIC roc::rocfft)
target_link_libraries(libgromacs PRIVATE roc::rocfft)
endif()

target_link_libraries(libgromacs PRIVATE $<BUILD_INTERFACE:common>)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -734,21 +734,23 @@ static void computeSpecialForces(FILE* fplog,
* \param[in] stepWork Step schedule flags
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
* \param[in] lambdaQ The Coulomb lambda of the current state.
* \param[in] useMdGpuGraph Whether MD GPU Graph is in use.
* \param[in] wcycle The wallcycle structure
*/
static inline void launchPmeGpuSpread(gmx_pme_t* pmedata,
const matrix box,
const StepWorkload& stepWork,
GpuEventSynchronizer* xReadyOnDevice,
const real lambdaQ,
bool useMdGpuGraph,
gmx_wallcycle* wcycle)
{
wallcycle_start(wcycle, WallCycleCounter::PmeGpuMesh);
pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
bool useGpuDirectComm = false;
gmx::PmeCoordinateReceiverGpu* pmeCoordinateReceiverGpu = nullptr;
pme_gpu_launch_spread(
pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu);
pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu, useMdGpuGraph);
wallcycle_stop(wcycle, WallCycleCounter::PmeGpuMesh);
}

Expand Down Expand Up @@ -1574,6 +1576,7 @@ void do_force(FILE* fplog,
stepWork,
localXReadyOnDevice,
lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
simulationWork.useMdGpuGraph,
wcycle);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -728,21 +728,23 @@ static void computeSpecialForces(FILE* fplog,
* \param[in] stepWork Step schedule flags
* \param[in] xReadyOnDevice Event synchronizer indicating that the coordinates are ready in the device memory.
* \param[in] lambdaQ The Coulomb lambda of the current state.
* \param[in] useMdGpuGraph Whether MD GPU Graph is in use.
* \param[in] wcycle The wallcycle structure
*/
static inline void launchPmeGpuSpread(gmx_pme_t* pmedata,
const matrix box,
const StepWorkload& stepWork,
GpuEventSynchronizer* xReadyOnDevice,
const real lambdaQ,
bool useMdGpuGraph,
gmx_wallcycle* wcycle)
{
wallcycle_start(wcycle, WallCycleCounter::PmeGpuMesh);
pme_gpu_prepare_computation(pmedata, box, wcycle, stepWork);
bool useGpuDirectComm = false;
gmx::PmeCoordinateReceiverGpu* pmeCoordinateReceiverGpu = nullptr;
pme_gpu_launch_spread(
pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu);
pmedata, xReadyOnDevice, wcycle, lambdaQ, useGpuDirectComm, pmeCoordinateReceiverGpu, useMdGpuGraph);
wallcycle_stop(wcycle, WallCycleCounter::PmeGpuMesh);
}

Expand Down Expand Up @@ -1568,6 +1570,7 @@ void do_force(FILE* fplog,
stepWork,
localXReadyOnDevice,
lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)],
simulationWork.useMdGpuGraph,
wcycle);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,7 @@ void gmx::LegacySimulator::do_md()
if (plumedswitch && bHREX) {
// gmx_enerdata_t *hrex_enerd;
int nlambda = enerd->foreignLambdaTerms.numLambdas();
gmx_enerdata_t hrex_enerd(enerd->grpp.nener, nlambda == 0 ? 0 : nlambda - 1);
gmx_enerdata_t hrex_enerd(enerd->grpp.nener, nlambda == 0 ? nullptr: &inputrec->fepvals->all_lambda);
int repl = -1;
int nrepl = -1;
if (MAIN(cr)){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include "gromacs/fileio/gmxfio.h"
#include "gromacs/fileio/oenv.h"
#include "gromacs/fileio/tpxio.h"
#include "gromacs/fileio/trrio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/gpu_utils/device_stream_manager.h"
Expand Down Expand Up @@ -937,6 +938,19 @@ int Mdrunner::mdrunner()
*/
applyGlobalSimulationState(
*inputHolder_.get(), partialDeserializedTpr.get(), globalState.get(), inputrec.get(), &mtop);

static_assert(sc_trrMaxAtomCount == sc_checkpointMaxAtomCount);
if (mtop.natoms > sc_checkpointMaxAtomCount)
{
gmx_fatal(FARGS,
"System has %d atoms, which is more than can be stored in checkpoint and trr "
"files (max %" PRId64 ")",
mtop.natoms,
sc_checkpointMaxAtomCount);
}

// The XTC format has been updated to support up to 2^31-1 atoms, which is anyway the
// largest supported by GROMACS, so no need for any particular check here.
}

/* Check and update the hardware options for internal consistency */
Expand Down Expand Up @@ -1078,11 +1092,12 @@ int Mdrunner::mdrunner()
// the task-deciding functions and will agree on the result
// without needing to communicate.
// The LBFGS minimizer, test-particle insertion, normal modes and shell dynamics don't support DD
const bool hasCustomParallelization =
(EI_TPI(inputrec->eI) || inputrec->eI == IntegrationAlgorithm::NM);
const bool canUseDomainDecomposition =
!(inputrec->eI == IntegrationAlgorithm::LBFGS || EI_TPI(inputrec->eI)
|| inputrec->eI == IntegrationAlgorithm::NM
|| gmx_mtop_particletype_count(mtop)[ParticleType::Shell] > 0);
GMX_RELEASE_ASSERT(!PAR(cr) || canUseDomainDecomposition,
(inputrec->eI != IntegrationAlgorithm::LBFGS && !hasCustomParallelization
&& gmx_mtop_particletype_count(mtop)[ParticleType::Shell] == 0);
GMX_RELEASE_ASSERT(!PAR(cr) || hasCustomParallelization || canUseDomainDecomposition,
"A parallel run should not arrive here without DD support");

int useDDWithSingleRank = -1;
Expand Down Expand Up @@ -1455,6 +1470,9 @@ int Mdrunner::mdrunner()
else
{
/* PME, if used, is done on all nodes with 1D decomposition */
cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
cr->mpi_comm_mysim = cr->mpiDefaultCommunicator;

cr->nnodes = cr->sizeOfDefaultCommunicator;
cr->sim_nodeid = cr->rankInDefaultCommunicator;
cr->nodeid = cr->rankInDefaultCommunicator;
Expand Down Expand Up @@ -2109,7 +2127,7 @@ int Mdrunner::mdrunner()

/* Energy terms and groups */
gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
inputrec->fepvals->n_lambda);
&inputrec->fepvals->all_lambda);

// cos acceleration is only supported by md, but older tpr
// files might still combine it with other integrators
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include "gromacs/fileio/gmxfio.h"
#include "gromacs/fileio/oenv.h"
#include "gromacs/fileio/tpxio.h"
#include "gromacs/fileio/trrio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/gpu_utils/device_stream_manager.h"
Expand Down Expand Up @@ -927,6 +928,19 @@ int Mdrunner::mdrunner()
*/
applyGlobalSimulationState(
*inputHolder_.get(), partialDeserializedTpr.get(), globalState.get(), inputrec.get(), &mtop);

static_assert(sc_trrMaxAtomCount == sc_checkpointMaxAtomCount);
if (mtop.natoms > sc_checkpointMaxAtomCount)
{
gmx_fatal(FARGS,
"System has %d atoms, which is more than can be stored in checkpoint and trr "
"files (max %" PRId64 ")",
mtop.natoms,
sc_checkpointMaxAtomCount);
}

// The XTC format has been updated to support up to 2^31-1 atoms, which is anyway the
// largest supported by GROMACS, so no need for any particular check here.
}

/* Check and update the hardware options for internal consistency */
Expand Down Expand Up @@ -1068,11 +1082,12 @@ int Mdrunner::mdrunner()
// the task-deciding functions and will agree on the result
// without needing to communicate.
// The LBFGS minimizer, test-particle insertion, normal modes and shell dynamics don't support DD
const bool hasCustomParallelization =
(EI_TPI(inputrec->eI) || inputrec->eI == IntegrationAlgorithm::NM);
const bool canUseDomainDecomposition =
!(inputrec->eI == IntegrationAlgorithm::LBFGS || EI_TPI(inputrec->eI)
|| inputrec->eI == IntegrationAlgorithm::NM
|| gmx_mtop_particletype_count(mtop)[ParticleType::Shell] > 0);
GMX_RELEASE_ASSERT(!PAR(cr) || canUseDomainDecomposition,
(inputrec->eI != IntegrationAlgorithm::LBFGS && !hasCustomParallelization
&& gmx_mtop_particletype_count(mtop)[ParticleType::Shell] == 0);
GMX_RELEASE_ASSERT(!PAR(cr) || hasCustomParallelization || canUseDomainDecomposition,
"A parallel run should not arrive here without DD support");

int useDDWithSingleRank = -1;
Expand Down Expand Up @@ -1441,6 +1456,9 @@ int Mdrunner::mdrunner()
else
{
/* PME, if used, is done on all nodes with 1D decomposition */
cr->mpi_comm_mygroup = cr->mpiDefaultCommunicator;
cr->mpi_comm_mysim = cr->mpiDefaultCommunicator;

cr->nnodes = cr->sizeOfDefaultCommunicator;
cr->sim_nodeid = cr->rankInDefaultCommunicator;
cr->nodeid = cr->rankInDefaultCommunicator;
Expand Down Expand Up @@ -2095,7 +2113,7 @@ int Mdrunner::mdrunner()

/* Energy terms and groups */
gmx_enerdata_t enerd(mtop.groups.groups[SimulationAtomGroupType::EnergyOutput].size(),
inputrec->fepvals->n_lambda);
&inputrec->fepvals->all_lambda);

// cos acceleration is only supported by md, but older tpr
// files might still combine it with other integrators
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,8 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
const bool pmeSpreadGatherUsesCpu = (pmeRunMode == PmeRunMode::CPU);

std::string errorMessage;
// Flag to set if we do not want to log the error with `-update auto` (e.g., for non-GPU build)
bool silenceWarningMessageWithUpdateAuto = forceCpuUpdateDefault;

if (isDomainDecomposition)
{
Expand Down Expand Up @@ -664,14 +666,19 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
{
errorMessage +=
"Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
silenceWarningMessageWithUpdateAuto = true;
}
if (!gpusWereDetected)
{
errorMessage += "Compatible GPUs must have been found.\n";
silenceWarningMessageWithUpdateAuto = true;
}
if (!(GMX_GPU_CUDA || GMX_GPU_SYCL))
{
errorMessage += "Only CUDA and SYCL builds are supported.\n";
// Silence clang-analyzer deadcode.DeadStores warning about ignoring the previous assignments
GMX_UNUSED_VALUE(silenceWarningMessageWithUpdateAuto);
silenceWarningMessageWithUpdateAuto = true;
}
if (inputrec.eI != IntegrationAlgorithm::MD)
{
Expand Down Expand Up @@ -768,7 +775,7 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,

if (!errorMessage.empty())
{
if (updateTarget == TaskTarget::Auto && !forceCpuUpdateDefault)
if (updateTarget == TaskTarget::Auto && !silenceWarningMessageWithUpdateAuto)
{
GMX_LOG(mdlog.info)
.asParagraph()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,8 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
const bool pmeSpreadGatherUsesCpu = (pmeRunMode == PmeRunMode::CPU);

std::string errorMessage;
// Flag to set if we do not want to log the error with `-update auto` (e.g., for non-GPU build)
bool silenceWarningMessageWithUpdateAuto = forceCpuUpdateDefault;

if (isDomainDecomposition)
{
Expand Down Expand Up @@ -663,14 +665,19 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,
{
errorMessage +=
"Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
silenceWarningMessageWithUpdateAuto = true;
}
if (!gpusWereDetected)
{
errorMessage += "Compatible GPUs must have been found.\n";
silenceWarningMessageWithUpdateAuto = true;
}
if (!(GMX_GPU_CUDA || GMX_GPU_SYCL))
{
errorMessage += "Only CUDA and SYCL builds are supported.\n";
// Silence clang-analyzer deadcode.DeadStores warning about ignoring the previous assignments
GMX_UNUSED_VALUE(silenceWarningMessageWithUpdateAuto);
silenceWarningMessageWithUpdateAuto = true;
}
if (inputrec.eI != IntegrationAlgorithm::MD)
{
Expand Down Expand Up @@ -762,7 +769,7 @@ bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition,

if (!errorMessage.empty())
{
if (updateTarget == TaskTarget::Auto && !forceCpuUpdateDefault)
if (updateTarget == TaskTarget::Auto && !silenceWarningMessageWithUpdateAuto)
{
GMX_LOG(mdlog.info)
.asParagraph()
Expand Down
Loading