From 556252d1bca3e941ea6d483c1a38f497626f0965 Mon Sep 17 00:00:00 2001 From: Giovanni Bussi <giovanni.bussi@gmail.com> Date: Mon, 23 Mar 2015 16:03:10 +0100 Subject: [PATCH] Updated gmx 5.0.x to 5.0.4 --- .../src/gromacs/CMakeLists.txt | 14 +++---- .../src/gromacs/CMakeLists.txt.preplumed | 14 +++---- .../src/gromacs/mdlib/force.c | 17 ++++---- .../src/gromacs/mdlib/force.c.preplumed | 17 ++++---- .../src/gromacs/mdlib/minimize.c | 17 ++++---- .../src/gromacs/mdlib/minimize.c.preplumed | 17 ++++---- .../gromacs-5.0.diff/src/programs/mdrun/md.c | 42 +++++++++++++++++-- .../src/programs/mdrun/md.c.preplumed | 42 +++++++++++++++++-- 8 files changed, 128 insertions(+), 52 deletions(-) diff --git a/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt b/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt index 272bb55bb..cc97aa805 100644 --- a/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt +++ b/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt @@ -61,11 +61,6 @@ if(GMX_USE_TNG) include_directories(${TNG_IO_INCLUDE_DIRS}) endif() if(NOT GMX_EXTERNAL_TNG) - # TNG wants zlib if it is available - find_package(ZLIB QUIET) - include(gmxTestZLib) - gmx_test_zlib(HAVE_ZLIB) - include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake) tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS) list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES}) @@ -73,11 +68,12 @@ if(GMX_USE_TNG) if (HAVE_ZLIB) list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES}) + include_directories(${ZLIB_INCLUDE_DIRS}) endif() endif() else() # We still need to get tng/tng_io_fwd.h from somewhere! - include_directories(${CMAKE_SOURCE_DIR}/src/external/tng_io/include) + include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include) endif() add_subdirectory(gmxlib) @@ -205,7 +201,11 @@ set_target_properties(libgromacs PROPERTIES # Only install the library in mdrun-only mode if it is actually necessary # for the binary if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) - install(TARGETS libgromacs DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries) + install(TARGETS libgromacs + LIBRARY DESTINATION ${LIB_INSTALL_DIR} + RUNTIME DESTINATION ${BIN_INSTALL_DIR} + ARCHIVE DESTINATION ${LIB_INSTALL_DIR} + COMPONENT libraries) endif() if (NOT GMX_BUILD_MDRUN_ONLY) diff --git a/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt.preplumed b/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt.preplumed index 2045cebcc..6db37e240 100644 --- a/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt.preplumed +++ b/patches/gromacs-5.0.diff/src/gromacs/CMakeLists.txt.preplumed @@ -59,11 +59,6 @@ if(GMX_USE_TNG) include_directories(${TNG_IO_INCLUDE_DIRS}) endif() if(NOT GMX_EXTERNAL_TNG) - # TNG wants zlib if it is available - find_package(ZLIB QUIET) - include(gmxTestZLib) - gmx_test_zlib(HAVE_ZLIB) - include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake) tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS) list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES}) @@ -71,11 +66,12 @@ if(GMX_USE_TNG) if (HAVE_ZLIB) list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES}) + include_directories(${ZLIB_INCLUDE_DIRS}) endif() endif() else() # We still need to get tng/tng_io_fwd.h from somewhere! - include_directories(${CMAKE_SOURCE_DIR}/src/external/tng_io/include) + include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include) endif() add_subdirectory(gmxlib) @@ -203,7 +199,11 @@ set_target_properties(libgromacs PROPERTIES # Only install the library in mdrun-only mode if it is actually necessary # for the binary if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) - install(TARGETS libgromacs DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries) + install(TARGETS libgromacs + LIBRARY DESTINATION ${LIB_INSTALL_DIR} + RUNTIME DESTINATION ${BIN_INSTALL_DIR} + ARCHIVE DESTINATION ${LIB_INSTALL_DIR} + COMPONENT libraries) endif() if (NOT GMX_BUILD_MDRUN_ONLY) diff --git a/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c b/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c index f4de01a74..8227d5b18 100644 --- a/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c +++ b/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c @@ -125,9 +125,11 @@ static void reduce_thread_forces(int n, rvec *f, int nthreads, f_thread_t *f_t) { int t, i; + int nthreads_loop gmx_unused; /* This reduction can run over any number of threads */ -#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) + nthreads_loop = gmx_omp_nthreads_get(emntBonded); +#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static) for (i = 0; i < n; i++) { for (t = 1; t < nthreads; t++) @@ -547,14 +549,11 @@ void do_force_lowlevel(FILE *fplog, gmx_int64_t step, ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, - md->chargeA, - md->nChargePerturbed ? md->chargeB : NULL, - md->sqrt_c6A, - md->nTypePerturbed ? md->sqrt_c6B : NULL, - md->sigmaA, - md->nTypePerturbed ? md->sigmaB : NULL, - md->sigma3A, - md->nTypePerturbed ? md->sigma3B : NULL, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + md->sigma3A, md->sigma3B, + md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, diff --git a/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c.preplumed b/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c.preplumed index 632c2f3a4..5230983cb 100644 --- a/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c.preplumed +++ b/patches/gromacs-5.0.diff/src/gromacs/mdlib/force.c.preplumed @@ -117,9 +117,11 @@ static void reduce_thread_forces(int n, rvec *f, int nthreads, f_thread_t *f_t) { int t, i; + int nthreads_loop gmx_unused; /* This reduction can run over any number of threads */ -#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) + nthreads_loop = gmx_omp_nthreads_get(emntBonded); +#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static) for (i = 0; i < n; i++) { for (t = 1; t < nthreads; t++) @@ -539,14 +541,11 @@ void do_force_lowlevel(FILE *fplog, gmx_int64_t step, ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], cr, t, fr, - md->chargeA, - md->nChargePerturbed ? md->chargeB : NULL, - md->sqrt_c6A, - md->nTypePerturbed ? md->sqrt_c6B : NULL, - md->sigmaA, - md->nTypePerturbed ? md->sigmaB : NULL, - md->sigma3A, - md->nTypePerturbed ? md->sigma3B : NULL, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + md->sigma3A, md->sigma3B, + md->nChargePerturbed || md->nTypePerturbed, ir->cutoff_scheme != ecutsVERLET, excl, x, bSB ? boxs : box, mu_tot, ir->ewald_geometry, diff --git a/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c b/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c index 5b29bd701..1c1d0b00b 100644 --- a/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c +++ b/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c @@ -317,7 +317,8 @@ void init_em(FILE *fplog, const char *title, gmx_vsite_t *vsite, gmx_constr_t constr, int nfile, const t_filenm fnm[], gmx_mdoutf_t *outf, t_mdebin **mdebin, - int imdport, unsigned long gmx_unused Flags) + int imdport, unsigned long gmx_unused Flags, + gmx_wallcycle_t wcycle) { int i; real dvdl_constr; @@ -434,7 +435,7 @@ void init_em(FILE *fplog, const char *title, *gstat = global_stat_init(ir); } - *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL); + *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle); snew(*enerd, 1); init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, @@ -593,6 +594,7 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, int start, end; rvec *x1, *x2; real dvdl_constr; + int nthreads gmx_unused; s1 = &ems1->s; s2 = &ems2->s; @@ -630,7 +632,8 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, x1 = s1->x; x2 = s2->x; -#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate)) + nthreads = gmx_omp_nthreads_get(emntUpdate); +#pragma omp parallel num_threads(nthreads) { int gf, i, m; @@ -1072,7 +1075,7 @@ double do_cg(FILE *fplog, t_commrec *cr, init_em(fplog, CG, cr, inputrec, state_global, top_global, s_min, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, &mdebin, imdport, Flags); + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); /* Print to log file */ print_em_start(fplog, cr, walltime_accounting, wcycle, CG); @@ -1744,7 +1747,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr, init_em(fplog, LBFGS, cr, inputrec, state, top_global, &ems, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, &mdebin, imdport, Flags); + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); /* Do_lbfgs is not completely updated like do_steep and do_cg, * so we free some memory again. */ @@ -2489,7 +2492,7 @@ double do_steep(FILE *fplog, t_commrec *cr, init_em(fplog, SD, cr, inputrec, state_global, top_global, s_try, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, &mdebin, imdport, Flags); + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); /* Print to log file */ print_em_start(fplog, cr, walltime_accounting, wcycle, SD); @@ -2741,7 +2744,7 @@ double do_nm(FILE *fplog, t_commrec *cr, state_global, top_global, state_work, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, NULL, imdport, Flags); + nfile, fnm, &outf, NULL, imdport, Flags, wcycle); natoms = top_global->natoms; snew(fneg, natoms); diff --git a/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c.preplumed b/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c.preplumed index ab48e44b4..69008f53f 100644 --- a/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c.preplumed +++ b/patches/gromacs-5.0.diff/src/gromacs/mdlib/minimize.c.preplumed @@ -310,7 +310,8 @@ void init_em(FILE *fplog, const char *title, gmx_vsite_t *vsite, gmx_constr_t constr, int nfile, const t_filenm fnm[], gmx_mdoutf_t *outf, t_mdebin **mdebin, - int imdport, unsigned long gmx_unused Flags) + int imdport, unsigned long gmx_unused Flags, + gmx_wallcycle_t wcycle) { int i; real dvdl_constr; @@ -427,7 +428,7 @@ void init_em(FILE *fplog, const char *title, *gstat = global_stat_init(ir); } - *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL); + *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle); snew(*enerd, 1); init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, @@ -549,6 +550,7 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, int start, end; rvec *x1, *x2; real dvdl_constr; + int nthreads gmx_unused; s1 = &ems1->s; s2 = &ems2->s; @@ -586,7 +588,8 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, x1 = s1->x; x2 = s2->x; -#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate)) + nthreads = gmx_omp_nthreads_get(emntUpdate); +#pragma omp parallel num_threads(nthreads) { int gf, i, m; @@ -1000,7 +1003,7 @@ double do_cg(FILE *fplog, t_commrec *cr, init_em(fplog, CG, cr, inputrec, state_global, top_global, s_min, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, &mdebin, imdport, Flags); + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); /* Print to log file */ print_em_start(fplog, cr, walltime_accounting, wcycle, CG); @@ -1672,7 +1675,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr, init_em(fplog, LBFGS, cr, inputrec, state, top_global, &ems, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, &mdebin, imdport, Flags); + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); /* Do_lbfgs is not completely updated like do_steep and do_cg, * so we free some memory again. */ @@ -2417,7 +2420,7 @@ double do_steep(FILE *fplog, t_commrec *cr, init_em(fplog, SD, cr, inputrec, state_global, top_global, s_try, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, &mdebin, imdport, Flags); + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); /* Print to log file */ print_em_start(fplog, cr, walltime_accounting, wcycle, SD); @@ -2669,7 +2672,7 @@ double do_nm(FILE *fplog, t_commrec *cr, state_global, top_global, state_work, &top, &f, &f_global, nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, - nfile, fnm, &outf, NULL, imdport, Flags); + nfile, fnm, &outf, NULL, imdport, Flags, wcycle); natoms = top_global->natoms; snew(fneg, natoms); diff --git a/patches/gromacs-5.0.diff/src/programs/mdrun/md.c b/patches/gromacs-5.0.diff/src/programs/mdrun/md.c index 8b54282d6..481aec4c6 100644 --- a/patches/gromacs-5.0.diff/src/programs/mdrun/md.c +++ b/patches/gromacs-5.0.diff/src/programs/mdrun/md.c @@ -309,7 +309,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], &(state_global->fep_state), lam0, nrnb, top_global, &upd, nfile, fnm, &outf, &mdebin, - force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags); + force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle); clear_mat(total_vir); clear_mat(pres); @@ -1176,6 +1176,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], if (bVV && !bStartingFromCpt && !bRerunMD) /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ { + wallcycle_start(wcycle, ewcUPDATE); if (ir->eI == eiVV && bInitStep) { /* if using velocity verlet with full time step Ekin, @@ -1250,11 +1251,13 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], bOK = TRUE; if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ { + wallcycle_stop(wcycle, ewcUPDATE); update_constraints(fplog, step, NULL, ir, ekind, mdatoms, state, fr->bMolPBC, graph, f, &top->idef, shake_vir, cr, nrnb, wcycle, upd, constr, TRUE, bCalcVir, vetanew); + wallcycle_start(wcycle, ewcUPDATE); if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) { @@ -1294,6 +1297,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], So we need information from the last step in the first half of the integration */ if (bGStat || do_per_step(step-1, nstglobalcomm)) { + wallcycle_stop(wcycle, ewcUPDATE); compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, constr, NULL, FALSE, state->box, @@ -1314,6 +1318,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], time step kinetic energy for the pressure (always true now, since we want accurate statistics). b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in EkinAveVel because it's needed for the pressure */ + wallcycle_start(wcycle, ewcUPDATE); } /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ if (!bInitStep) @@ -1327,7 +1332,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], { if (bExchanged) { - + wallcycle_stop(wcycle, ewcUPDATE); /* We need the kinetic energy at minus the half step for determining * the full step kinetic energy and possibly for T-coupling.*/ /* This may not be quite working correctly yet . . . . */ @@ -1336,6 +1341,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], constr, NULL, FALSE, state->box, top_global, &bSumEkinhOld, CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); + wallcycle_start(wcycle, ewcUPDATE); } } } @@ -1365,6 +1371,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], { copy_rvecn(cbuf, state->v, 0, state->natoms); } + wallcycle_stop(wcycle, ewcUPDATE); } /* MRS -- now done iterating -- compute the conserved quantity */ @@ -1407,7 +1414,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global, top_global, fr, outf, mdebin, ekind, f, f_global, - wcycle, &nchkpt, + &nchkpt, bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), bSumEkinhOld); /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ @@ -1989,6 +1996,21 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], } dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); + if (bPMETuneRunning && + fr->nbv->bUseGPU && DOMAINDECOMP(cr) && + !(cr->duty & DUTY_PME)) + { + /* Lock DLB=auto to off (does nothing when DLB=yes/no). + * With GPUs + separate PME ranks, we don't want DLB. + * This could happen when we scan coarse grids and + * it would then never be turned off again. + * This would hurt performance at the final, optimal + * grid spacing, where DLB almost never helps. + * Also, DLB can limit the cut-off for PME tuning. + */ + dd_dlb_set_lock(cr->dd, TRUE); + } + if (bPMETuneRunning || step_rel > ir->nstlist*50) { bPMETuneTry = FALSE; @@ -2019,6 +2041,16 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], { calc_enervirdiff(NULL, ir->eDispCorr, fr); } + + if (!bPMETuneRunning && + DOMAINDECOMP(cr) && + dd_dlb_is_locked(cr->dd)) + { + /* Unlock the DLB=auto, DLB is allowed to activate + * (but we don't expect it to activate in most cases). + */ + dd_dlb_set_lock(cr->dd, FALSE); + } } cycles_pmes = 0; } @@ -2049,6 +2081,10 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], /* End of main MD loop */ debug_gmx(); + /* Closing TNG files can include compressing data. Therefore it is good to do that + * before stopping the time measurements. */ + mdoutf_tng_close(outf); + /* Stop measuring walltime */ walltime_accounting_end(walltime_accounting); diff --git a/patches/gromacs-5.0.diff/src/programs/mdrun/md.c.preplumed b/patches/gromacs-5.0.diff/src/programs/mdrun/md.c.preplumed index 5fb9b7011..3d98d597c 100644 --- a/patches/gromacs-5.0.diff/src/programs/mdrun/md.c.preplumed +++ b/patches/gromacs-5.0.diff/src/programs/mdrun/md.c.preplumed @@ -298,7 +298,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], &(state_global->fep_state), lam0, nrnb, top_global, &upd, nfile, fnm, &outf, &mdebin, - force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags); + force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle); clear_mat(total_vir); clear_mat(pres); @@ -1089,6 +1089,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], if (bVV && !bStartingFromCpt && !bRerunMD) /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ { + wallcycle_start(wcycle, ewcUPDATE); if (ir->eI == eiVV && bInitStep) { /* if using velocity verlet with full time step Ekin, @@ -1163,11 +1164,13 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], bOK = TRUE; if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ { + wallcycle_stop(wcycle, ewcUPDATE); update_constraints(fplog, step, NULL, ir, ekind, mdatoms, state, fr->bMolPBC, graph, f, &top->idef, shake_vir, cr, nrnb, wcycle, upd, constr, TRUE, bCalcVir, vetanew); + wallcycle_start(wcycle, ewcUPDATE); if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1) { @@ -1207,6 +1210,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], So we need information from the last step in the first half of the integration */ if (bGStat || do_per_step(step-1, nstglobalcomm)) { + wallcycle_stop(wcycle, ewcUPDATE); compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, constr, NULL, FALSE, state->box, @@ -1227,6 +1231,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], time step kinetic energy for the pressure (always true now, since we want accurate statistics). b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in EkinAveVel because it's needed for the pressure */ + wallcycle_start(wcycle, ewcUPDATE); } /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ if (!bInitStep) @@ -1240,7 +1245,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], { if (bExchanged) { - + wallcycle_stop(wcycle, ewcUPDATE); /* We need the kinetic energy at minus the half step for determining * the full step kinetic energy and possibly for T-coupling.*/ /* This may not be quite working correctly yet . . . . */ @@ -1249,6 +1254,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], constr, NULL, FALSE, state->box, top_global, &bSumEkinhOld, CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); + wallcycle_start(wcycle, ewcUPDATE); } } } @@ -1278,6 +1284,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], { copy_rvecn(cbuf, state->v, 0, state->natoms); } + wallcycle_stop(wcycle, ewcUPDATE); } /* MRS -- now done iterating -- compute the conserved quantity */ @@ -1320,7 +1327,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global, top_global, fr, outf, mdebin, ekind, f, f_global, - wcycle, &nchkpt, + &nchkpt, bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), bSumEkinhOld); /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ @@ -1902,6 +1909,21 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], } dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); + if (bPMETuneRunning && + fr->nbv->bUseGPU && DOMAINDECOMP(cr) && + !(cr->duty & DUTY_PME)) + { + /* Lock DLB=auto to off (does nothing when DLB=yes/no). + * With GPUs + separate PME ranks, we don't want DLB. + * This could happen when we scan coarse grids and + * it would then never be turned off again. + * This would hurt performance at the final, optimal + * grid spacing, where DLB almost never helps. + * Also, DLB can limit the cut-off for PME tuning. + */ + dd_dlb_set_lock(cr->dd, TRUE); + } + if (bPMETuneRunning || step_rel > ir->nstlist*50) { bPMETuneTry = FALSE; @@ -1932,6 +1954,16 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], { calc_enervirdiff(NULL, ir->eDispCorr, fr); } + + if (!bPMETuneRunning && + DOMAINDECOMP(cr) && + dd_dlb_is_locked(cr->dd)) + { + /* Unlock the DLB=auto, DLB is allowed to activate + * (but we don't expect it to activate in most cases). + */ + dd_dlb_set_lock(cr->dd, FALSE); + } } cycles_pmes = 0; } @@ -1962,6 +1994,10 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], /* End of main MD loop */ debug_gmx(); + /* Closing TNG files can include compressing data. Therefore it is good to do that + * before stopping the time measurements. */ + mdoutf_tng_close(outf); + /* Stop measuring walltime */ walltime_accounting_end(walltime_accounting); -- GitLab