diff --git a/CHANGES/v2.0.txt b/CHANGES/v2.0.txt index b89a018c9c86c259523f5cab31470a7784ba8e86..62c7b71e54308539729ac034f1770ab2e0cf61a3 100644 --- a/CHANGES/v2.0.txt +++ b/CHANGES/v2.0.txt @@ -73,6 +73,8 @@ For users: - Fixed bug with \ref READ and periodic variables. - Fixed bug with \ref HISTOGRAM (option USE_ALL_DATA was not working properly). - Gromacs patch updated to 4.6.5. +- Gromacs patch for 4.6 has been modified to allow for better load balancing when + using GPUs. - Added option 'plumed info --long-version'. Notice that --version and --long-version are automatically updated from git repositories. - Added full reference (page/number) to published paper in doc and log. diff --git a/patches/gromacs-4.6.5.diff/src/kernel/md.c b/patches/gromacs-4.6.5.diff/src/kernel/md.c index 1d6420b345b8efbf95de9f00008bfa8e797bbe9c..c896ffadd94a9bfac98caa32822d7f8482161473 100644 --- a/patches/gromacs-4.6.5.diff/src/kernel/md.c +++ b/patches/gromacs-4.6.5.diff/src/kernel/md.c @@ -1243,6 +1243,9 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[mdatoms->start]); plumed_cmd(plumedmain,"setBox",&state->box[0][0]); plumed_cmd(plumedmain,"prepareCalc",NULL); + plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); + plumed_cmd(plumedmain,"setForces",&f[mdatoms->start][0]); + plumed_cmd(plumedmain,"setVirial",&force_vir[0][0]); plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); } /* END PLUMED */ @@ -1254,11 +1257,10 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], (plumedNeedsEnergy? GMX_FORCE_ENERGY : 0) |(bNS ? GMX_FORCE_NS : 0) | force_flags); /* PLUMED */ if(plumedswitch){ - if(plumedNeedsEnergy) plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); - plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); - plumed_cmd(plumedmain,"setForces",&f[mdatoms->start][0]); - plumed_cmd(plumedmain,"setVirial",&force_vir[0][0]); - plumed_cmd(plumedmain,"performCalc",NULL); + if(plumedNeedsEnergy){ + plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); + plumed_cmd(plumedmain,"performCalc",NULL); + } if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && do_per_step(step,repl_ex_nst)) plumed_cmd(plumedmain,"GREX savePositions",NULL); if(plumedWantsToStop) ir->nsteps=step_rel+1; diff --git a/patches/gromacs-4.6.5.diff/src/kernel/mdrun.c b/patches/gromacs-4.6.5.diff/src/kernel/mdrun.c index e64c399efec380dc86c23f7a2d4e997233b2ef98..e57c646b6155a6cdfb11317d3d4a806658b1980a 100644 --- a/patches/gromacs-4.6.5.diff/src/kernel/mdrun.c +++ b/patches/gromacs-4.6.5.diff/src/kernel/mdrun.c @@ -60,8 +60,8 @@ /* PLUMED */ #include "../../Plumed.h" -int plumedswitch; -plumed plumedmain; +extern int plumedswitch; +extern plumed plumedmain; extern void(*plumedcmd)(plumed,const char*,const void*); /* END PLUMED */ int cmain(int argc, char *argv[]) @@ -753,7 +753,7 @@ int cmain(int argc, char *argv[]) /* PLUMED */ plumedswitch=0; if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1; - if(plumedswitch){ + if(plumedswitch){ plumedcmd=plumed_cmd; int plumed_is_there=0; int real_precision=sizeof(real); real energyUnits=1.0; diff --git a/patches/gromacs-4.6.5.diff/src/mdlib/force.c b/patches/gromacs-4.6.5.diff/src/mdlib/force.c new file mode 100644 index 0000000000000000000000000000000000000000..a36cbd0e5873f4cd3d0f041b84d35fe46f5c2bdd --- /dev/null +++ b/patches/gromacs-4.6.5.diff/src/mdlib/force.c @@ -0,0 +1,991 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + * Copyright (c) 2012,2013, by the GROMACS development team, led by + * David van der Spoel, Berk Hess, Erik Lindahl, and including many + * others, as listed in the AUTHORS file in the top-level source + * directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <math.h> +#include <string.h> +#include <assert.h> +#include "sysstuff.h" +#include "typedefs.h" +#include "macros.h" +#include "smalloc.h" +#include "macros.h" +#include "physics.h" +#include "force.h" +#include "nonbonded.h" +#include "names.h" +#include "network.h" +#include "pbc.h" +#include "ns.h" +#include "nrnb.h" +#include "bondf.h" +#include "mshift.h" +#include "txtdump.h" +#include "coulomb.h" +#include "pme.h" +#include "mdrun.h" +#include "domdec.h" +#include "partdec.h" +#include "qmmm.h" +#include "mpelogging.h" +#include "gmx_omp_nthreads.h" + +/* PLUMED */ +#include "../../Plumed.h" +int plumedswitch=0; +plumed plumedmain; +void(*plumedcmd)(plumed,const char*,const void*)=NULL; +/* END PLUMED */ + + +void ns(FILE *fp, + t_forcerec *fr, + rvec x[], + matrix box, + gmx_groups_t *groups, + t_grpopts *opts, + gmx_localtop_t *top, + t_mdatoms *md, + t_commrec *cr, + t_nrnb *nrnb, + real *lambda, + real *dvdlambda, + gmx_grppairener_t *grppener, + gmx_bool bFillGrid, + gmx_bool bDoLongRangeNS) +{ + char *ptr; + int nsearch; + + GMX_MPE_LOG(ev_ns_start); + if (!fr->ns.nblist_initialized) + { + init_neighbor_list(fp, fr, md->homenr); + } + + if (fr->bTwinRange) + { + fr->nlr = 0; + } + + nsearch = search_neighbours(fp, fr, x, box, top, groups, cr, nrnb, md, + lambda, dvdlambda, grppener, + bFillGrid, bDoLongRangeNS, TRUE); + if (debug) + { + fprintf(debug, "nsearch = %d\n", nsearch); + } + + /* Check whether we have to do dynamic load balancing */ + /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) + count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, + &(top->idef),opts->ngener); + */ + if (fr->ns.dump_nl > 0) + { + dump_nblist(fp, cr, fr, fr->ns.dump_nl); + } + + GMX_MPE_LOG(ev_ns_finish); +} + +static void reduce_thread_forces(int n, rvec *f, + tensor vir, + real *Vcorr, + int efpt_ind, real *dvdl, + int nthreads, f_thread_t *f_t) +{ + int t, i; + + /* This reduction can run over any number of threads */ +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) + for (i = 0; i < n; i++) + { + for (t = 1; t < nthreads; t++) + { + rvec_inc(f[i], f_t[t].f[i]); + } + } + for (t = 1; t < nthreads; t++) + { + *Vcorr += f_t[t].Vcorr; + *dvdl += f_t[t].dvdl[efpt_ind]; + m_add(vir, f_t[t].vir, vir); + } +} + +void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, + t_forcerec *fr, t_inputrec *ir, + t_idef *idef, t_commrec *cr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + t_mdatoms *md, + t_grpopts *opts, + rvec x[], history_t *hist, + rvec f[], + rvec f_longrange[], + gmx_enerdata_t *enerd, + t_fcdata *fcd, + gmx_mtop_t *mtop, + gmx_localtop_t *top, + gmx_genborn_t *born, + t_atomtypes *atype, + gmx_bool bBornRadii, + matrix box, + t_lambda *fepvals, + real *lambda, + t_graph *graph, + t_blocka *excl, + rvec mu_tot[], + int flags, + float *cycles_pme) +{ + int i, j, status; + int donb_flags; + gmx_bool bDoEpot, bSepDVDL, bSB; + int pme_flags; + matrix boxs; + rvec box_size; + real Vsr, Vlr, Vcorr = 0; + t_pbc pbc; + real dvdgb; + char buf[22]; + double clam_i, vlam_i; + real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; + real dvdlsum; + +#ifdef GMX_MPI + double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ +#endif + +#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } + + GMX_MPE_LOG(ev_force_start); + set_pbc(&pbc, fr->ePBC, box); + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + dvdl_dum[i] = 0; + } + + /* Reset box */ + for (i = 0; (i < DIM); i++) + { + box_size[i] = box[i][i]; + } + + bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); + debug_gmx(); + + /* do QMMM first if requested */ + if (fr->bQMMM) + { + enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); + } + + if (bSepDVDL) + { + fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", + gmx_step_str(step, buf), cr->nodeid); + } + + /* Call the short range functions all in one go. */ + GMX_MPE_LOG(ev_do_fnbf_start); + +#ifdef GMX_MPI + /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ +#define TAKETIME FALSE + if (TAKETIME) + { + MPI_Barrier(cr->mpi_comm_mygroup); + t0 = MPI_Wtime(); + } +#endif + + if (ir->nwall) + { + /* foreign lambda component for walls */ + dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], + enerd->grpp.ener[egLJSR], nrnb); + PRINT_SEPDVDL("Walls", 0.0, dvdl); + enerd->dvdl_lin[efptVDW] += dvdl; + } + + /* If doing GB, reset dvda and calculate the Born radii */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsNONBONDED); + + for (i = 0; i < born->nr; i++) + { + fr->dvda[i] = 0; + } + + if (bBornRadii) + { + calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); + } + + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + + where(); + /* We only do non-bonded calculation with group scheme here, the verlet + * calls are done from do_force_cutsVERLET(). */ + if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) + { + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + if (flags & GMX_FORCE_DO_LR) + { + donb_flags |= GMX_NONBONDED_DO_LR; + } + + wallcycle_sub_start(wcycle, ewcsNONBONDED); + do_nonbonded(cr, fr, x, f, f_longrange, md, excl, + &enerd->grpp, box_size, nrnb, + lambda, dvdl_nb, -1, -1, donb_flags); + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); + do_nonbonded(cr, fr, x, f, f_longrange, md, excl, + &(enerd->foreign_grpp), box_size, nrnb, + lam_i, dvdl_dum, -1, -1, + (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); + sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + where(); + } + + /* If we are doing GB, calculate bonded forces and apply corrections + * to the solvation forces */ + /* MRS: Eventually, many need to include free energy contribution here! */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsBONDED); + calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, + ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); + wallcycle_sub_stop(wcycle, ewcsBONDED); + } + +#ifdef GMX_MPI + if (TAKETIME) + { + t1 = MPI_Wtime(); + fr->t_fnbf += t1-t0; + } +#endif + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + } + + if (fepvals->sc_alpha != 0) + + /* even though coulomb part is linear, we already added it, beacuse we + need to go through the vdw calculation anyway */ + { + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + Vsr = 0; + if (bSepDVDL) + { + for (i = 0; i < enerd->grpp.nener; i++) + { + Vsr += + (fr->bBHAM ? + enerd->grpp.ener[egBHAMSR][i] : + enerd->grpp.ener[egLJSR][i]) + + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; + } + dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; + PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); + } + debug_gmx(); + + GMX_MPE_LOG(ev_do_fnbf_finish); + + if (debug) + { + pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); + } + + /* Shift the coordinates. Must be done before bonded forces and PPPM, + * but is also necessary for SHAKE and update, therefore it can NOT + * go when no bonded forces have to be evaluated. + */ + + /* Here sometimes we would not need to shift with NBFonly, + * but we do so anyhow for consistency of the returned coordinates. + */ + if (graph) + { + shift_self(graph, box, x); + if (TRICLINIC(box)) + { + inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); + } + else + { + inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); + } + } + /* Check whether we need to do bondeds or correct for exclusions */ + if (fr->bMolPBC && + ((flags & GMX_FORCE_BONDED) + || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) + { + /* Since all atoms are in the rectangular or triclinic unit-cell, + * only single box vector shifts (2 in x) are required. + */ + set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); + } + debug_gmx(); + + if (flags & GMX_FORCE_BONDED) + { + GMX_MPE_LOG(ev_calc_bonds_start); + + wallcycle_sub_start(wcycle, ewcsBONDED); + calc_bonds(fplog, cr->ms, + idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, + flags, + fr->bSepDVDL && do_per_step(step, ir->nstlog), step); + + /* Check if we have to determine energy differences + * at foreign lambda's. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && + idef->ilsort != ilsortNO_FE) + { + if (idef->ilsort != ilsortFE_SORTED) + { + gmx_incons("The bonded interactions are not sorted for free energy"); + } + for (i = 0; i < enerd->n_lambda; i++) + { + reset_foreign_enerdata(enerd); + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, + fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + debug_gmx(); + GMX_MPE_LOG(ev_calc_bonds_finish); + wallcycle_sub_stop(wcycle, ewcsBONDED); + } + + where(); + + *cycles_pme = 0; + if (EEL_FULL(fr->eeltype)) + { + bSB = (ir->nwall == 2); + if (bSB) + { + copy_mat(box, boxs); + svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); + box_size[ZZ] *= ir->wall_ewald_zfac; + } + + clear_mat(fr->vir_el_recip); + + if (fr->bEwald) + { + Vcorr = 0; + dvdl = 0; + + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + /* The TPI molecule does not have exclusions with the rest + * of the system and no intra-molecular PME grid contributions + * will be calculated in gmx_pme_calc_energy. + */ + if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || + ir->ewald_geometry != eewg3D || + ir->epsilon_surface != 0) + { + int nthreads, t; + + wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); + + if (fr->n_tpi > 0) + { + gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); + } + + nthreads = gmx_omp_nthreads_get(emntBonded); +#pragma omp parallel for num_threads(nthreads) schedule(static) + for (t = 0; t < nthreads; t++) + { + int s, e, i; + rvec *fnv; + tensor *vir; + real *Vcorrt, *dvdlt; + if (t == 0) + { + fnv = fr->f_novirsum; + vir = &fr->vir_el_recip; + Vcorrt = &Vcorr; + dvdlt = &dvdl; + } + else + { + fnv = fr->f_t[t].f; + vir = &fr->f_t[t].vir; + Vcorrt = &fr->f_t[t].Vcorr; + dvdlt = &fr->f_t[t].dvdl[efptCOUL]; + for (i = 0; i < fr->natoms_force; i++) + { + clear_rvec(fnv[i]); + } + clear_mat(*vir); + } + *dvdlt = 0; + *Vcorrt = + ewald_LRcorrection(fplog, + fr->excl_load[t], fr->excl_load[t+1], + cr, t, fr, + md->chargeA, + md->nChargePerturbed ? md->chargeB : NULL, + ir->cutoff_scheme != ecutsVERLET, + excl, x, bSB ? boxs : box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + fnv, *vir, + lambda[efptCOUL], dvdlt); + } + if (nthreads > 1) + { + reduce_thread_forces(fr->natoms_force, fr->f_novirsum, + fr->vir_el_recip, + &Vcorr, efptCOUL, &dvdl, + nthreads, fr->f_t); + } + + wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); + } + + if (fr->n_tpi == 0) + { + Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, + &dvdl, fr->vir_el_recip); + } + + PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); + enerd->dvdl_lin[efptCOUL] += dvdl; + } + + status = 0; + Vlr = 0; + dvdl = 0; + switch (fr->eeltype) + { + case eelPME: + case eelPMESWITCH: + case eelPMEUSER: + case eelPMEUSERSWITCH: + case eelP3M_AD: + if (cr->duty & DUTY_PME) + { + assert(fr->n_tpi >= 0); + if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) + { + pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; + if (flags & GMX_FORCE_FORCES) + { + pme_flags |= GMX_PME_CALC_F; + } + if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) + { + pme_flags |= GMX_PME_CALC_ENER_VIR; + } + if (fr->n_tpi > 0) + { + /* We don't calculate f, but we do want the potential */ + pme_flags |= GMX_PME_CALC_POT; + } + wallcycle_start(wcycle, ewcPMEMESH); + status = gmx_pme_do(fr->pmedata, + md->start, md->homenr - fr->n_tpi, + x, fr->f_novirsum, + md->chargeA, md->chargeB, + bSB ? boxs : box, cr, + DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, + DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, + nrnb, wcycle, + fr->vir_el_recip, fr->ewaldcoeff, + &Vlr, lambda[efptCOUL], &dvdl, + pme_flags); + *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); + + /* We should try to do as little computation after + * this as possible, because parallel PME synchronizes + * the nodes, so we want all load imbalance of the rest + * of the force calculation to be before the PME call. + * DD load balancing is done on the whole time of + * the force call (without PME). + */ + } + if (fr->n_tpi > 0) + { + /* Determine the PME grid energy of the test molecule + * with the PME grid potential of the other charges. + */ + gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, + x + md->homenr - fr->n_tpi, + md->chargeA + md->homenr - fr->n_tpi, + &Vlr); + } + PRINT_SEPDVDL("PME mesh", Vlr, dvdl); + } + break; + case eelEWALD: + Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, + md->chargeA, md->chargeB, + box_size, cr, md->homenr, + fr->vir_el_recip, fr->ewaldcoeff, + lambda[efptCOUL], &dvdl, fr->ewald_table); + PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); + break; + default: + gmx_fatal(FARGS, "No such electrostatics method implemented %s", + eel_names[fr->eeltype]); + } + if (status != 0) + { + gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", + status, EELTYPE(fr->eeltype)); + } + /* Note that with separate PME nodes we get the real energies later */ + enerd->dvdl_lin[efptCOUL] += dvdl; + enerd->term[F_COUL_RECIP] = Vlr + Vcorr; + if (debug) + { + fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", + Vlr, Vcorr, enerd->term[F_COUL_RECIP]); + pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); + pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + } + } + else + { + if (EEL_RF(fr->eeltype)) + { + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) + { + dvdl = 0; + enerd->term[F_RF_EXCL] = + RF_excl_correction(fplog, fr, graph, md, excl, x, f, + fr->fshift, &pbc, lambda[efptCOUL], &dvdl); + } + + enerd->dvdl_lin[efptCOUL] += dvdl; + PRINT_SEPDVDL("RF exclusion correction", + enerd->term[F_RF_EXCL], dvdl); + } + } + where(); + debug_gmx(); + + if (debug) + { + print_nrnb(debug, nrnb); + } + debug_gmx(); + +#ifdef GMX_MPI + if (TAKETIME) + { + t2 = MPI_Wtime(); + MPI_Barrier(cr->mpi_comm_mygroup); + t3 = MPI_Wtime(); + fr->t_wait += t3-t2; + if (fr->timesteps == 11) + { + fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", + cr->nodeid, gmx_step_str(fr->timesteps, buf), + 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), + (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); + } + fr->timesteps++; + } +#endif + + if (debug) + { + pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + } + + GMX_MPE_LOG(ev_force_finish); + + +/* PLUMED */ + if(plumedswitch){ + int plumedNeedsEnergy; + (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); + if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL); + } +/* END PLUMED */ + + +} + +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) +{ + int i, n2; + + for (i = 0; i < F_NRE; i++) + { + enerd->term[i] = 0; + enerd->foreign_term[i] = 0; + } + + + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0; + enerd->dvdl_nonlin[i] = 0; + } + + n2 = ngener*ngener; + if (debug) + { + fprintf(debug, "Creating %d sized group matrix for energies\n", n2); + } + enerd->grpp.nener = n2; + enerd->foreign_grpp.nener = n2; + for (i = 0; (i < egNR); i++) + { + snew(enerd->grpp.ener[i], n2); + snew(enerd->foreign_grpp.ener[i], n2); + } + + if (n_lambda) + { + enerd->n_lambda = 1 + n_lambda; + snew(enerd->enerpart_lambda, enerd->n_lambda); + } + else + { + enerd->n_lambda = 0; + } +} + +void destroy_enerdata(gmx_enerdata_t *enerd) +{ + int i; + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->grpp.ener[i]); + } + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->foreign_grpp.ener[i]); + } + + if (enerd->n_lambda) + { + sfree(enerd->enerpart_lambda); + } +} + +static real sum_v(int n, real v[]) +{ + real t; + int i; + + t = 0.0; + for (i = 0; (i < n); i++) + { + t = t + v[i]; + } + + return t; +} + +void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot) +{ + int i; + + /* Accumulate energies */ + epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); + epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); + epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); + epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); + epot[F_COUL_LR] = sum_v(grpp->nener, grpp->ener[egCOULLR]); + epot[F_LJ_LR] = sum_v(grpp->nener, grpp->ener[egLJLR]); + /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ + epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); + +/* lattice part of LR doesnt belong to any group + * and has been added earlier + */ + epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); + epot[F_BHAM_LR] = sum_v(grpp->nener, grpp->ener[egBHAMLR]); + + epot[F_EPOT] = 0; + for (i = 0; (i < F_EPOT); i++) + { + if (i != F_DISRESVIOL && i != F_ORIRESDEV) + { + epot[F_EPOT] += epot[i]; + } + } +} + +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) +{ + int i, j, index; + double dlam; + + enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ + enerd->term[F_DVDL] = 0.0; + for (i = 0; i < efptNR; i++) + { + if (fepvals->separate_dvdl[i]) + { + /* could this be done more readably/compactly? */ + switch (i) + { + case (efptMASS): + index = F_DKDL; + break; + case (efptCOUL): + index = F_DVDL_COUL; + break; + case (efptVDW): + index = F_DVDL_VDW; + break; + case (efptBONDED): + index = F_DVDL_BONDED; + break; + case (efptRESTRAINT): + index = F_DVDL_RESTRAINT; + break; + default: + index = F_DVDL; + break; + } + enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + else + { + enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + } + + /* Notes on the foreign lambda free energy difference evaluation: + * Adding the potential and ekin terms that depend linearly on lambda + * as delta lam * dvdl to the energy differences is exact. + * For the constraints this is not exact, but we have no other option + * without literally changing the lengths and reevaluating the energies at each step. + * (try to remedy this post 4.6 - MRS) + * For the non-bonded LR term we assume that the soft-core (if present) + * no longer affects the energy beyond the short-range cut-off, + * which is a very good approximation (except for exotic settings). + * (investigate how to overcome this post 4.6 - MRS) + */ + if (fepvals->separate_dvdl[efptBONDED]) + { + enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; + } + else + { + enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; + } + enerd->term[F_DVDL_CONSTR] = 0; + + for (i = 0; i < fepvals->n_lambda; i++) + { /* note we are iterating over fepvals here! + For the current lam, dlam = 0 automatically, + so we don't need to add anything to the + enerd->enerpart_lambda[0] */ + + /* we don't need to worry about dvdl_lin contributions to dE at + current lambda, because the contributions to the current + lambda are automatically zeroed */ + + for (j = 0; j < efptNR; j++) + { + /* Note that this loop is over all dhdl components, not just the separated ones */ + dlam = (fepvals->all_lambda[j][i]-lambda[j]); + enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; + if (debug) + { + fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", + fepvals->all_lambda[j][i], efpt_names[j], + (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), + dlam, enerd->dvdl_lin[j]); + } + } + } +} + + +void reset_foreign_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all foreign energy components. Foreign energies always called on + neighbor search steps */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->foreign_grpp.ener[i][j] = 0.0; + } + } + + /* potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->foreign_term[i] = 0.0; + } +} + +void reset_enerdata(t_grpopts *opts, + t_forcerec *fr, gmx_bool bNS, + gmx_enerdata_t *enerd, + gmx_bool bMaster) +{ + gmx_bool bKeepLR; + int i, j; + + /* First reset all energy components, except for the long range terms + * on the master at non neighbor search steps, since the long range + * terms have already been summed at the last neighbor search step. + */ + bKeepLR = (fr->bTwinRange && !bNS); + for (i = 0; (i < egNR); i++) + { + if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->grpp.ener[i][j] = 0.0; + } + } + } + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0.0; + enerd->dvdl_nonlin[i] = 0.0; + } + + /* Normal potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->term[i] = 0.0; + } + /* Initialize the dVdlambda term with the long range contribution */ + /* Initialize the dvdl term with the long range contribution */ + enerd->term[F_DVDL] = 0.0; + enerd->term[F_DVDL_COUL] = 0.0; + enerd->term[F_DVDL_VDW] = 0.0; + enerd->term[F_DVDL_BONDED] = 0.0; + enerd->term[F_DVDL_RESTRAINT] = 0.0; + enerd->term[F_DKDL] = 0.0; + if (enerd->n_lambda > 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + enerd->enerpart_lambda[i] = 0.0; + } + } + /* reset foreign energy data - separate function since we also call it elsewhere */ + reset_foreign_enerdata(enerd); +} diff --git a/patches/gromacs-4.6.5.diff/src/mdlib/force.c.preplumed b/patches/gromacs-4.6.5.diff/src/mdlib/force.c.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..75da6bd137d3f9847956ba7763602eb5b0395975 --- /dev/null +++ b/patches/gromacs-4.6.5.diff/src/mdlib/force.c.preplumed @@ -0,0 +1,973 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + * Copyright (c) 2012,2013, by the GROMACS development team, led by + * David van der Spoel, Berk Hess, Erik Lindahl, and including many + * others, as listed in the AUTHORS file in the top-level source + * directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <math.h> +#include <string.h> +#include <assert.h> +#include "sysstuff.h" +#include "typedefs.h" +#include "macros.h" +#include "smalloc.h" +#include "macros.h" +#include "physics.h" +#include "force.h" +#include "nonbonded.h" +#include "names.h" +#include "network.h" +#include "pbc.h" +#include "ns.h" +#include "nrnb.h" +#include "bondf.h" +#include "mshift.h" +#include "txtdump.h" +#include "coulomb.h" +#include "pme.h" +#include "mdrun.h" +#include "domdec.h" +#include "partdec.h" +#include "qmmm.h" +#include "mpelogging.h" +#include "gmx_omp_nthreads.h" + +void ns(FILE *fp, + t_forcerec *fr, + rvec x[], + matrix box, + gmx_groups_t *groups, + t_grpopts *opts, + gmx_localtop_t *top, + t_mdatoms *md, + t_commrec *cr, + t_nrnb *nrnb, + real *lambda, + real *dvdlambda, + gmx_grppairener_t *grppener, + gmx_bool bFillGrid, + gmx_bool bDoLongRangeNS) +{ + char *ptr; + int nsearch; + + GMX_MPE_LOG(ev_ns_start); + if (!fr->ns.nblist_initialized) + { + init_neighbor_list(fp, fr, md->homenr); + } + + if (fr->bTwinRange) + { + fr->nlr = 0; + } + + nsearch = search_neighbours(fp, fr, x, box, top, groups, cr, nrnb, md, + lambda, dvdlambda, grppener, + bFillGrid, bDoLongRangeNS, TRUE); + if (debug) + { + fprintf(debug, "nsearch = %d\n", nsearch); + } + + /* Check whether we have to do dynamic load balancing */ + /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) + count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, + &(top->idef),opts->ngener); + */ + if (fr->ns.dump_nl > 0) + { + dump_nblist(fp, cr, fr, fr->ns.dump_nl); + } + + GMX_MPE_LOG(ev_ns_finish); +} + +static void reduce_thread_forces(int n, rvec *f, + tensor vir, + real *Vcorr, + int efpt_ind, real *dvdl, + int nthreads, f_thread_t *f_t) +{ + int t, i; + + /* This reduction can run over any number of threads */ +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) + for (i = 0; i < n; i++) + { + for (t = 1; t < nthreads; t++) + { + rvec_inc(f[i], f_t[t].f[i]); + } + } + for (t = 1; t < nthreads; t++) + { + *Vcorr += f_t[t].Vcorr; + *dvdl += f_t[t].dvdl[efpt_ind]; + m_add(vir, f_t[t].vir, vir); + } +} + +void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, + t_forcerec *fr, t_inputrec *ir, + t_idef *idef, t_commrec *cr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + t_mdatoms *md, + t_grpopts *opts, + rvec x[], history_t *hist, + rvec f[], + rvec f_longrange[], + gmx_enerdata_t *enerd, + t_fcdata *fcd, + gmx_mtop_t *mtop, + gmx_localtop_t *top, + gmx_genborn_t *born, + t_atomtypes *atype, + gmx_bool bBornRadii, + matrix box, + t_lambda *fepvals, + real *lambda, + t_graph *graph, + t_blocka *excl, + rvec mu_tot[], + int flags, + float *cycles_pme) +{ + int i, j, status; + int donb_flags; + gmx_bool bDoEpot, bSepDVDL, bSB; + int pme_flags; + matrix boxs; + rvec box_size; + real Vsr, Vlr, Vcorr = 0; + t_pbc pbc; + real dvdgb; + char buf[22]; + double clam_i, vlam_i; + real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; + real dvdlsum; + +#ifdef GMX_MPI + double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ +#endif + +#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } + + GMX_MPE_LOG(ev_force_start); + set_pbc(&pbc, fr->ePBC, box); + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + dvdl_dum[i] = 0; + } + + /* Reset box */ + for (i = 0; (i < DIM); i++) + { + box_size[i] = box[i][i]; + } + + bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); + debug_gmx(); + + /* do QMMM first if requested */ + if (fr->bQMMM) + { + enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); + } + + if (bSepDVDL) + { + fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", + gmx_step_str(step, buf), cr->nodeid); + } + + /* Call the short range functions all in one go. */ + GMX_MPE_LOG(ev_do_fnbf_start); + +#ifdef GMX_MPI + /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ +#define TAKETIME FALSE + if (TAKETIME) + { + MPI_Barrier(cr->mpi_comm_mygroup); + t0 = MPI_Wtime(); + } +#endif + + if (ir->nwall) + { + /* foreign lambda component for walls */ + dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], + enerd->grpp.ener[egLJSR], nrnb); + PRINT_SEPDVDL("Walls", 0.0, dvdl); + enerd->dvdl_lin[efptVDW] += dvdl; + } + + /* If doing GB, reset dvda and calculate the Born radii */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsNONBONDED); + + for (i = 0; i < born->nr; i++) + { + fr->dvda[i] = 0; + } + + if (bBornRadii) + { + calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); + } + + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + + where(); + /* We only do non-bonded calculation with group scheme here, the verlet + * calls are done from do_force_cutsVERLET(). */ + if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) + { + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + if (flags & GMX_FORCE_DO_LR) + { + donb_flags |= GMX_NONBONDED_DO_LR; + } + + wallcycle_sub_start(wcycle, ewcsNONBONDED); + do_nonbonded(cr, fr, x, f, f_longrange, md, excl, + &enerd->grpp, box_size, nrnb, + lambda, dvdl_nb, -1, -1, donb_flags); + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); + do_nonbonded(cr, fr, x, f, f_longrange, md, excl, + &(enerd->foreign_grpp), box_size, nrnb, + lam_i, dvdl_dum, -1, -1, + (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); + sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + where(); + } + + /* If we are doing GB, calculate bonded forces and apply corrections + * to the solvation forces */ + /* MRS: Eventually, many need to include free energy contribution here! */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsBONDED); + calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, + ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); + wallcycle_sub_stop(wcycle, ewcsBONDED); + } + +#ifdef GMX_MPI + if (TAKETIME) + { + t1 = MPI_Wtime(); + fr->t_fnbf += t1-t0; + } +#endif + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + } + + if (fepvals->sc_alpha != 0) + + /* even though coulomb part is linear, we already added it, beacuse we + need to go through the vdw calculation anyway */ + { + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + Vsr = 0; + if (bSepDVDL) + { + for (i = 0; i < enerd->grpp.nener; i++) + { + Vsr += + (fr->bBHAM ? + enerd->grpp.ener[egBHAMSR][i] : + enerd->grpp.ener[egLJSR][i]) + + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; + } + dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; + PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); + } + debug_gmx(); + + GMX_MPE_LOG(ev_do_fnbf_finish); + + if (debug) + { + pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); + } + + /* Shift the coordinates. Must be done before bonded forces and PPPM, + * but is also necessary for SHAKE and update, therefore it can NOT + * go when no bonded forces have to be evaluated. + */ + + /* Here sometimes we would not need to shift with NBFonly, + * but we do so anyhow for consistency of the returned coordinates. + */ + if (graph) + { + shift_self(graph, box, x); + if (TRICLINIC(box)) + { + inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); + } + else + { + inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); + } + } + /* Check whether we need to do bondeds or correct for exclusions */ + if (fr->bMolPBC && + ((flags & GMX_FORCE_BONDED) + || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) + { + /* Since all atoms are in the rectangular or triclinic unit-cell, + * only single box vector shifts (2 in x) are required. + */ + set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); + } + debug_gmx(); + + if (flags & GMX_FORCE_BONDED) + { + GMX_MPE_LOG(ev_calc_bonds_start); + + wallcycle_sub_start(wcycle, ewcsBONDED); + calc_bonds(fplog, cr->ms, + idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, + flags, + fr->bSepDVDL && do_per_step(step, ir->nstlog), step); + + /* Check if we have to determine energy differences + * at foreign lambda's. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && + idef->ilsort != ilsortNO_FE) + { + if (idef->ilsort != ilsortFE_SORTED) + { + gmx_incons("The bonded interactions are not sorted for free energy"); + } + for (i = 0; i < enerd->n_lambda; i++) + { + reset_foreign_enerdata(enerd); + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, + fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + debug_gmx(); + GMX_MPE_LOG(ev_calc_bonds_finish); + wallcycle_sub_stop(wcycle, ewcsBONDED); + } + + where(); + + *cycles_pme = 0; + if (EEL_FULL(fr->eeltype)) + { + bSB = (ir->nwall == 2); + if (bSB) + { + copy_mat(box, boxs); + svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); + box_size[ZZ] *= ir->wall_ewald_zfac; + } + + clear_mat(fr->vir_el_recip); + + if (fr->bEwald) + { + Vcorr = 0; + dvdl = 0; + + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + /* The TPI molecule does not have exclusions with the rest + * of the system and no intra-molecular PME grid contributions + * will be calculated in gmx_pme_calc_energy. + */ + if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || + ir->ewald_geometry != eewg3D || + ir->epsilon_surface != 0) + { + int nthreads, t; + + wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); + + if (fr->n_tpi > 0) + { + gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); + } + + nthreads = gmx_omp_nthreads_get(emntBonded); +#pragma omp parallel for num_threads(nthreads) schedule(static) + for (t = 0; t < nthreads; t++) + { + int s, e, i; + rvec *fnv; + tensor *vir; + real *Vcorrt, *dvdlt; + if (t == 0) + { + fnv = fr->f_novirsum; + vir = &fr->vir_el_recip; + Vcorrt = &Vcorr; + dvdlt = &dvdl; + } + else + { + fnv = fr->f_t[t].f; + vir = &fr->f_t[t].vir; + Vcorrt = &fr->f_t[t].Vcorr; + dvdlt = &fr->f_t[t].dvdl[efptCOUL]; + for (i = 0; i < fr->natoms_force; i++) + { + clear_rvec(fnv[i]); + } + clear_mat(*vir); + } + *dvdlt = 0; + *Vcorrt = + ewald_LRcorrection(fplog, + fr->excl_load[t], fr->excl_load[t+1], + cr, t, fr, + md->chargeA, + md->nChargePerturbed ? md->chargeB : NULL, + ir->cutoff_scheme != ecutsVERLET, + excl, x, bSB ? boxs : box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + fnv, *vir, + lambda[efptCOUL], dvdlt); + } + if (nthreads > 1) + { + reduce_thread_forces(fr->natoms_force, fr->f_novirsum, + fr->vir_el_recip, + &Vcorr, efptCOUL, &dvdl, + nthreads, fr->f_t); + } + + wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); + } + + if (fr->n_tpi == 0) + { + Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, + &dvdl, fr->vir_el_recip); + } + + PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); + enerd->dvdl_lin[efptCOUL] += dvdl; + } + + status = 0; + Vlr = 0; + dvdl = 0; + switch (fr->eeltype) + { + case eelPME: + case eelPMESWITCH: + case eelPMEUSER: + case eelPMEUSERSWITCH: + case eelP3M_AD: + if (cr->duty & DUTY_PME) + { + assert(fr->n_tpi >= 0); + if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) + { + pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; + if (flags & GMX_FORCE_FORCES) + { + pme_flags |= GMX_PME_CALC_F; + } + if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) + { + pme_flags |= GMX_PME_CALC_ENER_VIR; + } + if (fr->n_tpi > 0) + { + /* We don't calculate f, but we do want the potential */ + pme_flags |= GMX_PME_CALC_POT; + } + wallcycle_start(wcycle, ewcPMEMESH); + status = gmx_pme_do(fr->pmedata, + md->start, md->homenr - fr->n_tpi, + x, fr->f_novirsum, + md->chargeA, md->chargeB, + bSB ? boxs : box, cr, + DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, + DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, + nrnb, wcycle, + fr->vir_el_recip, fr->ewaldcoeff, + &Vlr, lambda[efptCOUL], &dvdl, + pme_flags); + *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); + + /* We should try to do as little computation after + * this as possible, because parallel PME synchronizes + * the nodes, so we want all load imbalance of the rest + * of the force calculation to be before the PME call. + * DD load balancing is done on the whole time of + * the force call (without PME). + */ + } + if (fr->n_tpi > 0) + { + /* Determine the PME grid energy of the test molecule + * with the PME grid potential of the other charges. + */ + gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, + x + md->homenr - fr->n_tpi, + md->chargeA + md->homenr - fr->n_tpi, + &Vlr); + } + PRINT_SEPDVDL("PME mesh", Vlr, dvdl); + } + break; + case eelEWALD: + Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, + md->chargeA, md->chargeB, + box_size, cr, md->homenr, + fr->vir_el_recip, fr->ewaldcoeff, + lambda[efptCOUL], &dvdl, fr->ewald_table); + PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); + break; + default: + gmx_fatal(FARGS, "No such electrostatics method implemented %s", + eel_names[fr->eeltype]); + } + if (status != 0) + { + gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", + status, EELTYPE(fr->eeltype)); + } + /* Note that with separate PME nodes we get the real energies later */ + enerd->dvdl_lin[efptCOUL] += dvdl; + enerd->term[F_COUL_RECIP] = Vlr + Vcorr; + if (debug) + { + fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", + Vlr, Vcorr, enerd->term[F_COUL_RECIP]); + pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); + pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + } + } + else + { + if (EEL_RF(fr->eeltype)) + { + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) + { + dvdl = 0; + enerd->term[F_RF_EXCL] = + RF_excl_correction(fplog, fr, graph, md, excl, x, f, + fr->fshift, &pbc, lambda[efptCOUL], &dvdl); + } + + enerd->dvdl_lin[efptCOUL] += dvdl; + PRINT_SEPDVDL("RF exclusion correction", + enerd->term[F_RF_EXCL], dvdl); + } + } + where(); + debug_gmx(); + + if (debug) + { + print_nrnb(debug, nrnb); + } + debug_gmx(); + +#ifdef GMX_MPI + if (TAKETIME) + { + t2 = MPI_Wtime(); + MPI_Barrier(cr->mpi_comm_mygroup); + t3 = MPI_Wtime(); + fr->t_wait += t3-t2; + if (fr->timesteps == 11) + { + fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", + cr->nodeid, gmx_step_str(fr->timesteps, buf), + 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), + (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); + } + fr->timesteps++; + } +#endif + + if (debug) + { + pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + } + + GMX_MPE_LOG(ev_force_finish); + +} + +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) +{ + int i, n2; + + for (i = 0; i < F_NRE; i++) + { + enerd->term[i] = 0; + enerd->foreign_term[i] = 0; + } + + + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0; + enerd->dvdl_nonlin[i] = 0; + } + + n2 = ngener*ngener; + if (debug) + { + fprintf(debug, "Creating %d sized group matrix for energies\n", n2); + } + enerd->grpp.nener = n2; + enerd->foreign_grpp.nener = n2; + for (i = 0; (i < egNR); i++) + { + snew(enerd->grpp.ener[i], n2); + snew(enerd->foreign_grpp.ener[i], n2); + } + + if (n_lambda) + { + enerd->n_lambda = 1 + n_lambda; + snew(enerd->enerpart_lambda, enerd->n_lambda); + } + else + { + enerd->n_lambda = 0; + } +} + +void destroy_enerdata(gmx_enerdata_t *enerd) +{ + int i; + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->grpp.ener[i]); + } + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->foreign_grpp.ener[i]); + } + + if (enerd->n_lambda) + { + sfree(enerd->enerpart_lambda); + } +} + +static real sum_v(int n, real v[]) +{ + real t; + int i; + + t = 0.0; + for (i = 0; (i < n); i++) + { + t = t + v[i]; + } + + return t; +} + +void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot) +{ + int i; + + /* Accumulate energies */ + epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); + epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); + epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); + epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); + epot[F_COUL_LR] = sum_v(grpp->nener, grpp->ener[egCOULLR]); + epot[F_LJ_LR] = sum_v(grpp->nener, grpp->ener[egLJLR]); + /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ + epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); + +/* lattice part of LR doesnt belong to any group + * and has been added earlier + */ + epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); + epot[F_BHAM_LR] = sum_v(grpp->nener, grpp->ener[egBHAMLR]); + + epot[F_EPOT] = 0; + for (i = 0; (i < F_EPOT); i++) + { + if (i != F_DISRESVIOL && i != F_ORIRESDEV) + { + epot[F_EPOT] += epot[i]; + } + } +} + +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) +{ + int i, j, index; + double dlam; + + enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ + enerd->term[F_DVDL] = 0.0; + for (i = 0; i < efptNR; i++) + { + if (fepvals->separate_dvdl[i]) + { + /* could this be done more readably/compactly? */ + switch (i) + { + case (efptMASS): + index = F_DKDL; + break; + case (efptCOUL): + index = F_DVDL_COUL; + break; + case (efptVDW): + index = F_DVDL_VDW; + break; + case (efptBONDED): + index = F_DVDL_BONDED; + break; + case (efptRESTRAINT): + index = F_DVDL_RESTRAINT; + break; + default: + index = F_DVDL; + break; + } + enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + else + { + enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + } + + /* Notes on the foreign lambda free energy difference evaluation: + * Adding the potential and ekin terms that depend linearly on lambda + * as delta lam * dvdl to the energy differences is exact. + * For the constraints this is not exact, but we have no other option + * without literally changing the lengths and reevaluating the energies at each step. + * (try to remedy this post 4.6 - MRS) + * For the non-bonded LR term we assume that the soft-core (if present) + * no longer affects the energy beyond the short-range cut-off, + * which is a very good approximation (except for exotic settings). + * (investigate how to overcome this post 4.6 - MRS) + */ + if (fepvals->separate_dvdl[efptBONDED]) + { + enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; + } + else + { + enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; + } + enerd->term[F_DVDL_CONSTR] = 0; + + for (i = 0; i < fepvals->n_lambda; i++) + { /* note we are iterating over fepvals here! + For the current lam, dlam = 0 automatically, + so we don't need to add anything to the + enerd->enerpart_lambda[0] */ + + /* we don't need to worry about dvdl_lin contributions to dE at + current lambda, because the contributions to the current + lambda are automatically zeroed */ + + for (j = 0; j < efptNR; j++) + { + /* Note that this loop is over all dhdl components, not just the separated ones */ + dlam = (fepvals->all_lambda[j][i]-lambda[j]); + enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; + if (debug) + { + fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", + fepvals->all_lambda[j][i], efpt_names[j], + (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), + dlam, enerd->dvdl_lin[j]); + } + } + } +} + + +void reset_foreign_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all foreign energy components. Foreign energies always called on + neighbor search steps */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->foreign_grpp.ener[i][j] = 0.0; + } + } + + /* potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->foreign_term[i] = 0.0; + } +} + +void reset_enerdata(t_grpopts *opts, + t_forcerec *fr, gmx_bool bNS, + gmx_enerdata_t *enerd, + gmx_bool bMaster) +{ + gmx_bool bKeepLR; + int i, j; + + /* First reset all energy components, except for the long range terms + * on the master at non neighbor search steps, since the long range + * terms have already been summed at the last neighbor search step. + */ + bKeepLR = (fr->bTwinRange && !bNS); + for (i = 0; (i < egNR); i++) + { + if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->grpp.ener[i][j] = 0.0; + } + } + } + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0.0; + enerd->dvdl_nonlin[i] = 0.0; + } + + /* Normal potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->term[i] = 0.0; + } + /* Initialize the dVdlambda term with the long range contribution */ + /* Initialize the dvdl term with the long range contribution */ + enerd->term[F_DVDL] = 0.0; + enerd->term[F_DVDL_COUL] = 0.0; + enerd->term[F_DVDL_VDW] = 0.0; + enerd->term[F_DVDL_BONDED] = 0.0; + enerd->term[F_DVDL_RESTRAINT] = 0.0; + enerd->term[F_DKDL] = 0.0; + if (enerd->n_lambda > 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + enerd->enerpart_lambda[i] = 0.0; + } + } + /* reset foreign energy data - separate function since we also call it elsewhere */ + reset_foreign_enerdata(enerd); +}