diff --git a/patches/gromacs-5.1.0.diff/src/gromacs/CMakeLists.txt b/patches/gromacs-5.1.2.diff/src/gromacs/CMakeLists.txt similarity index 100% rename from patches/gromacs-5.1.0.diff/src/gromacs/CMakeLists.txt rename to patches/gromacs-5.1.2.diff/src/gromacs/CMakeLists.txt diff --git a/patches/gromacs-5.1.0.diff/src/gromacs/CMakeLists.txt.preplumed b/patches/gromacs-5.1.2.diff/src/gromacs/CMakeLists.txt.preplumed similarity index 100% rename from patches/gromacs-5.1.0.diff/src/gromacs/CMakeLists.txt.preplumed rename to patches/gromacs-5.1.2.diff/src/gromacs/CMakeLists.txt.preplumed diff --git a/patches/gromacs-5.1.0.diff/src/gromacs/mdlib/force.cpp b/patches/gromacs-5.1.2.diff/src/gromacs/mdlib/force.cpp similarity index 100% rename from patches/gromacs-5.1.0.diff/src/gromacs/mdlib/force.cpp rename to patches/gromacs-5.1.2.diff/src/gromacs/mdlib/force.cpp diff --git a/patches/gromacs-5.1.0.diff/src/gromacs/mdlib/force.cpp.preplumed b/patches/gromacs-5.1.2.diff/src/gromacs/mdlib/force.cpp.preplumed similarity index 100% rename from patches/gromacs-5.1.0.diff/src/gromacs/mdlib/force.cpp.preplumed rename to patches/gromacs-5.1.2.diff/src/gromacs/mdlib/force.cpp.preplumed diff --git a/patches/gromacs-5.1.0.diff/src/gromacs/mdlib/minimize.cpp b/patches/gromacs-5.1.2.diff/src/gromacs/mdlib/minimize.cpp similarity index 100% rename from patches/gromacs-5.1.0.diff/src/gromacs/mdlib/minimize.cpp rename to patches/gromacs-5.1.2.diff/src/gromacs/mdlib/minimize.cpp diff --git a/patches/gromacs-5.1.0.diff/src/gromacs/mdlib/minimize.cpp.preplumed b/patches/gromacs-5.1.2.diff/src/gromacs/mdlib/minimize.cpp.preplumed similarity index 100% rename from patches/gromacs-5.1.0.diff/src/gromacs/mdlib/minimize.cpp.preplumed rename to patches/gromacs-5.1.2.diff/src/gromacs/mdlib/minimize.cpp.preplumed diff --git a/patches/gromacs-5.1.0.diff/src/programs/mdrun/md.cpp b/patches/gromacs-5.1.2.diff/src/programs/mdrun/md.cpp similarity index 98% rename from patches/gromacs-5.1.0.diff/src/programs/mdrun/md.cpp rename to patches/gromacs-5.1.2.diff/src/programs/mdrun/md.cpp index a914b2d7f14053caf34a72210ccf56e31dac925c..0007817df2c5c9c4f44428e8742ba03a31627d66 100644 --- a/patches/gromacs-5.1.0.diff/src/programs/mdrun/md.cpp +++ b/patches/gromacs-5.1.2.diff/src/programs/mdrun/md.cpp @@ -235,7 +235,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], simulation stops. If equal to zero, don't communicate any more between multisims.*/ /* PME load balancing data for GPU kernels */ - pme_load_balancing_t *pme_loadbal; + pme_load_balancing_t *pme_loadbal = NULL; gmx_bool bPMETune = FALSE; gmx_bool bPMETunePrinting = FALSE; @@ -1431,6 +1431,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], if (bResetCountersHalfMaxH && MASTER(cr) && elapsed_time > max_hours*60.0*60.0*0.495) { + /* Set flag that will communicate the signal to all ranks in the simulation */ gs.sig[eglsRESETCOUNTERS] = 1; } @@ -1845,10 +1846,28 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], step_rel++; } + /* TODO make a counter-reset module */ + /* If it is time to reset counters, set a flag that remains + true until counters actually get reset */ if (step_rel == wcycle_get_reset_counters(wcycle) || gs.set[eglsRESETCOUNTERS] != 0) { - /* Reset all the counters related to performance over the run */ + if (pme_loadbal_is_active(pme_loadbal)) + { + /* Do not permit counter reset while PME load + * balancing is active. The only purpose for resetting + * counters is to measure reliable performance data, + * and that can't be done before balancing + * completes. + * + * TODO consider fixing this by delaying the reset + * until after load balancing completes, + * e.g. https://gerrit.gromacs.org/#/c/4964/2 */ + gmx_fatal(FARGS, "PME tuning was still active when attempting to " + "reset mdrun counters at step " GMX_PRId64 ". Try " + "resetting counters later in the run, e.g. with gmx " + "mdrun -resetstep.", step); + } reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, use_GPU(fr->nbv) ? fr->nbv : NULL); wcycle_set_reset_counters(wcycle, -1); @@ -1859,7 +1878,9 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], } /* Correct max_hours for the elapsed time */ max_hours -= elapsed_time/(60.0*60.0); - bResetCountersHalfMaxH = FALSE; + /* If mdrun -maxh -resethway was active, it can only trigger once */ + bResetCountersHalfMaxH = FALSE; /* TODO move this to where gs.sig[eglsRESETCOUNTERS] is set */ + /* Reset can only happen once, so clear the triggering flag. */ gs.set[eglsRESETCOUNTERS] = 0; } diff --git a/patches/gromacs-5.1.0.diff/src/programs/mdrun/md.cpp.preplumed b/patches/gromacs-5.1.2.diff/src/programs/mdrun/md.cpp.preplumed similarity index 98% rename from patches/gromacs-5.1.0.diff/src/programs/mdrun/md.cpp.preplumed rename to patches/gromacs-5.1.2.diff/src/programs/mdrun/md.cpp.preplumed index f1372731958a627d77d317fab8fa1976527874a6..4f33d897e950a2793d76d963e8451ed0d6c8dd96 100644 --- a/patches/gromacs-5.1.0.diff/src/programs/mdrun/md.cpp.preplumed +++ b/patches/gromacs-5.1.2.diff/src/programs/mdrun/md.cpp.preplumed @@ -229,7 +229,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], simulation stops. If equal to zero, don't communicate any more between multisims.*/ /* PME load balancing data for GPU kernels */ - pme_load_balancing_t *pme_loadbal; + pme_load_balancing_t *pme_loadbal = NULL; gmx_bool bPMETune = FALSE; gmx_bool bPMETunePrinting = FALSE; @@ -1333,6 +1333,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], if (bResetCountersHalfMaxH && MASTER(cr) && elapsed_time > max_hours*60.0*60.0*0.495) { + /* Set flag that will communicate the signal to all ranks in the simulation */ gs.sig[eglsRESETCOUNTERS] = 1; } @@ -1747,10 +1748,28 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], step_rel++; } + /* TODO make a counter-reset module */ + /* If it is time to reset counters, set a flag that remains + true until counters actually get reset */ if (step_rel == wcycle_get_reset_counters(wcycle) || gs.set[eglsRESETCOUNTERS] != 0) { - /* Reset all the counters related to performance over the run */ + if (pme_loadbal_is_active(pme_loadbal)) + { + /* Do not permit counter reset while PME load + * balancing is active. The only purpose for resetting + * counters is to measure reliable performance data, + * and that can't be done before balancing + * completes. + * + * TODO consider fixing this by delaying the reset + * until after load balancing completes, + * e.g. https://gerrit.gromacs.org/#/c/4964/2 */ + gmx_fatal(FARGS, "PME tuning was still active when attempting to " + "reset mdrun counters at step " GMX_PRId64 ". Try " + "resetting counters later in the run, e.g. with gmx " + "mdrun -resetstep.", step); + } reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, use_GPU(fr->nbv) ? fr->nbv : NULL); wcycle_set_reset_counters(wcycle, -1); @@ -1761,7 +1780,9 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], } /* Correct max_hours for the elapsed time */ max_hours -= elapsed_time/(60.0*60.0); - bResetCountersHalfMaxH = FALSE; + /* If mdrun -maxh -resethway was active, it can only trigger once */ + bResetCountersHalfMaxH = FALSE; /* TODO move this to where gs.sig[eglsRESETCOUNTERS] is set */ + /* Reset can only happen once, so clear the triggering flag. */ gs.set[eglsRESETCOUNTERS] = 0; } diff --git a/patches/gromacs-5.1.0.diff/src/programs/mdrun/mdrun.cpp b/patches/gromacs-5.1.2.diff/src/programs/mdrun/mdrun.cpp similarity index 100% rename from patches/gromacs-5.1.0.diff/src/programs/mdrun/mdrun.cpp rename to patches/gromacs-5.1.2.diff/src/programs/mdrun/mdrun.cpp diff --git a/patches/gromacs-5.1.0.diff/src/programs/mdrun/mdrun.cpp.preplumed b/patches/gromacs-5.1.2.diff/src/programs/mdrun/mdrun.cpp.preplumed similarity index 100% rename from patches/gromacs-5.1.0.diff/src/programs/mdrun/mdrun.cpp.preplumed rename to patches/gromacs-5.1.2.diff/src/programs/mdrun/mdrun.cpp.preplumed diff --git a/patches/gromacs-5.1.0.diff/src/programs/mdrun/repl_ex.cpp b/patches/gromacs-5.1.2.diff/src/programs/mdrun/repl_ex.cpp similarity index 100% rename from patches/gromacs-5.1.0.diff/src/programs/mdrun/repl_ex.cpp rename to patches/gromacs-5.1.2.diff/src/programs/mdrun/repl_ex.cpp diff --git a/patches/gromacs-5.1.0.diff/src/programs/mdrun/repl_ex.cpp.preplumed b/patches/gromacs-5.1.2.diff/src/programs/mdrun/repl_ex.cpp.preplumed similarity index 100% rename from patches/gromacs-5.1.0.diff/src/programs/mdrun/repl_ex.cpp.preplumed rename to patches/gromacs-5.1.2.diff/src/programs/mdrun/repl_ex.cpp.preplumed