Skip to content
Snippets Groups Projects
minimize.cpp 106 KiB
Newer Older
carlocamilloni's avatar
carlocamilloni committed
/*
 * This file is part of the GROMACS molecular simulation package.
 *
 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 * Copyright (c) 2001-2004, The GROMACS development team.
 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
carlocamilloni's avatar
carlocamilloni committed
 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 * and including many others, as listed in the AUTHORS file in the
 * top-level source directory and at http://www.gromacs.org.
 *
 * GROMACS is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * GROMACS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with GROMACS; if not, see
 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 *
 * If you want to redistribute modifications to GROMACS, please
 * consider that scientific software is very special. Version
 * control is crucial - bugs must be traceable. We will be happy to
 * consider code for inclusion in the official distribution, but
 * derived work must not be called official GROMACS. Details are found
 * in the README & COPYING files - if they are missing, get the
 * official version at http://www.gromacs.org.
 *
 * To help us fund GROMACS development, we humbly ask that you cite
 * the research papers on the package. Check out http://www.gromacs.org.
 */
/*! \internal \file
 *
 * \brief This file defines integrators for energy minimization
 *
 * \author Berk Hess <hess@kth.se>
 * \author Erik Lindahl <erik@kth.se>
 * \ingroup module_mdlib
 */
#include "gmxpre.h"

#include "minimize.h"

#include "config.h"

#include <cmath>
#include <cstring>
#include <ctime>

#include <algorithm>
#include <vector>

#include "gromacs/commandline/filenm.h"
#include "gromacs/domdec/domdec.h"
#include "gromacs/domdec/domdec_struct.h"
#include "gromacs/ewald/pme.h"
#include "gromacs/fileio/confio.h"
#include "gromacs/fileio/mtxio.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nrnb.h"
#include "gromacs/imd/imd.h"
#include "gromacs/linearalgebra/sparsematrix.h"
#include "gromacs/listed-forces/manage-threading.h"
#include "gromacs/math/functions.h"
#include "gromacs/math/vec.h"
#include "gromacs/mdlib/constr.h"
#include "gromacs/mdlib/force.h"
#include "gromacs/mdlib/forcerec.h"
#include "gromacs/mdlib/gmx_omp_nthreads.h"
#include "gromacs/mdlib/md_support.h"
#include "gromacs/mdlib/mdatoms.h"
#include "gromacs/mdlib/mdebin.h"
#include "gromacs/mdlib/mdrun.h"
#include "gromacs/mdlib/mdsetup.h"
#include "gromacs/mdlib/ns.h"
#include "gromacs/mdlib/shellfc.h"
#include "gromacs/mdlib/sim_util.h"
#include "gromacs/mdlib/tgroup.h"
#include "gromacs/mdlib/trajectory_writing.h"
#include "gromacs/mdlib/update.h"
#include "gromacs/mdlib/vsite.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/mdtypes/state.h"
#include "gromacs/pbcutil/mshift.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/timing/walltime_accounting.h"
#include "gromacs/topology/mtop_util.h"
#include "gromacs/topology/topology.h"
#include "gromacs/utility/cstringutil.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/logger.h"
#include "gromacs/utility/smalloc.h"

/* PLUMED */
#include "../../../Plumed.h"
extern int    plumedswitch;
extern plumed plumedmain;
extern void(*plumedcmd)(plumed,const char*,const void*);
/* END PLUMED */

//! Utility structure for manipulating states during EM
typedef struct {
    //! Copy of the global state
    t_state          s;
    //! Force array
    PaddedRVecVector f;
    //! Potential energy
    real             epot;
    //! Norm of the force
    real             fnorm;
    //! Maximum force
    real             fmax;
    //! Direction
    int              a_fmax;
} em_state_t;

//! Print the EM starting conditions
static void print_em_start(FILE                     *fplog,
                           t_commrec                *cr,
                           gmx_walltime_accounting_t walltime_accounting,
                           gmx_wallcycle_t           wcycle,
                           const char               *name)
{
    walltime_accounting_start(walltime_accounting);
    wallcycle_start(wcycle, ewcRUN);
    print_start(fplog, cr, walltime_accounting, name);
}

//! Stop counting time for EM
static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
                        gmx_wallcycle_t           wcycle)
{
    wallcycle_stop(wcycle, ewcRUN);

    walltime_accounting_end(walltime_accounting);
}

//! Printing a log file and console header
static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
{
    fprintf(out, "\n");
    fprintf(out, "%s:\n", minimizer);
    fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
    fprintf(out, "   Number of steps    = %12d\n", nsteps);
}

//! Print warning message
static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
{
    char buffer[2048];
    if (bLastStep)
    {
        sprintf(buffer,
                "\nEnergy minimization reached the maximum number "
                "of steps before the forces reached the requested "
                "precision Fmax < %g.\n", ftol);
    }
    else
    {
        sprintf(buffer,
                "\nEnergy minimization has stopped, but the forces have "
                "not converged to the requested precision Fmax < %g (which "
                "may not be possible for your system). It stopped "
                "because the algorithm tried to make a new step whose size "
                "was too small, or there was no change in the energy since "
                "last step. Either way, we regard the minimization as "
                "converged to within the available machine precision, "
                "given your starting configuration and EM parameters.\n%s%s",
                ftol,
                sizeof(real) < sizeof(double) ?
                "\nDouble precision normally gives you higher accuracy, but "
                "this is often not needed for preparing to run molecular "
                "dynamics.\n" :
                "",
                bConstrain ?
                "You might need to increase your constraint accuracy, or turn\n"
                "off constraints altogether (set constraints = none in mdp file)\n" :
                "");
    }
    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
}

//! Print message about convergence of the EM
static void print_converged(FILE *fp, const char *alg, real ftol,
                            gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
                            const em_state_t *ems, double sqrtNumAtoms)
{
    char buf[STEPSTRSIZE];

    if (bDone)
    {
carlocamilloni's avatar
carlocamilloni committed
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
                alg, ftol, gmx_step_str(count, buf));
    }
    else if (count < nsteps)
    {
        fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
                "but did not reach the requested Fmax < %g.\n",
                alg, gmx_step_str(count, buf), ftol);
    }
    else
    {
        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
                alg, ftol, gmx_step_str(count, buf));
    }

#if GMX_DOUBLE
    fprintf(fp, "Potential Energy  = %21.14e\n", ems->epot);
    fprintf(fp, "Maximum force     = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1);
    fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm/sqrtNumAtoms);
#else
    fprintf(fp, "Potential Energy  = %14.7e\n", ems->epot);
    fprintf(fp, "Maximum force     = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1);
    fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm/sqrtNumAtoms);
#endif
}

//! Compute the norm and max of the force array in parallel
static void get_f_norm_max(t_commrec *cr,
                           t_grpopts *opts, t_mdatoms *mdatoms, const rvec *f,
                           real *fnorm, real *fmax, int *a_fmax)
{
    double fnorm2, *sum;
    real   fmax2, fam;
    int    la_max, a_max, start, end, i, m, gf;

    /* This routine finds the largest force and returns it.
     * On parallel machines the global max is taken.
     */
    fnorm2 = 0;
    fmax2  = 0;
    la_max = -1;
    start  = 0;
    end    = mdatoms->homenr;
    if (mdatoms->cFREEZE)
    {
        for (i = start; i < end; i++)
        {
            gf  = mdatoms->cFREEZE[i];
            fam = 0;
            for (m = 0; m < DIM; m++)
            {
                if (!opts->nFreeze[gf][m])
                {
                    fam += gmx::square(f[i][m]);
                }
            }
            fnorm2 += fam;
            if (fam > fmax2)
            {
                fmax2  = fam;
                la_max = i;
            }
        }
    }
    else
    {
        for (i = start; i < end; i++)
        {
            fam     = norm2(f[i]);
            fnorm2 += fam;
            if (fam > fmax2)
            {
                fmax2  = fam;
                la_max = i;
            }
        }
    }

    if (la_max >= 0 && DOMAINDECOMP(cr))
    {
        a_max = cr->dd->gatindex[la_max];
    }
    else
    {
        a_max = la_max;
    }
    if (PAR(cr))
    {
        snew(sum, 2*cr->nnodes+1);
        sum[2*cr->nodeid]   = fmax2;
        sum[2*cr->nodeid+1] = a_max;
        sum[2*cr->nnodes]   = fnorm2;
        gmx_sumd(2*cr->nnodes+1, sum, cr);
        fnorm2 = sum[2*cr->nnodes];
        /* Determine the global maximum */
        for (i = 0; i < cr->nnodes; i++)
        {
            if (sum[2*i] > fmax2)
            {
                fmax2 = sum[2*i];
                a_max = (int)(sum[2*i+1] + 0.5);
            }
        }
        sfree(sum);
    }

    if (fnorm)
    {
        *fnorm = sqrt(fnorm2);
    }
    if (fmax)
    {
        *fmax  = sqrt(fmax2);
    }
    if (a_fmax)
    {
        *a_fmax = a_max;
    }
}

//! Compute the norm of the force
static void get_state_f_norm_max(t_commrec *cr,
                                 t_grpopts *opts, t_mdatoms *mdatoms,
                                 em_state_t *ems)
{
    get_f_norm_max(cr, opts, mdatoms, as_rvec_array(ems->f.data()),
                   &ems->fnorm, &ems->fmax, &ems->a_fmax);
}

//! Initialize the energy minimization
static void init_em(FILE *fplog, const char *title,
                    t_commrec *cr, gmx::IMDOutputProvider *outputProvider,
                    t_inputrec *ir,
                    const MdrunOptions &mdrunOptions,
                    t_state *state_global, gmx_mtop_t *top_global,
                    em_state_t *ems, gmx_localtop_t **top,
                    t_nrnb *nrnb, rvec mu_tot,
                    t_forcerec *fr, gmx_enerdata_t **enerd,
                    t_graph **graph, gmx::MDAtoms *mdAtoms, gmx_global_stat_t *gstat,
                    gmx_vsite_t *vsite, gmx_constr_t constr, gmx_shellfc_t **shellfc,
                    int nfile, const t_filenm fnm[],
                    gmx_mdoutf_t *outf, t_mdebin **mdebin,
                    gmx_wallcycle_t wcycle)
{
    real dvdl_constr;

    if (fplog)
    {
        fprintf(fplog, "Initiating %s\n", title);
    }

    if (MASTER(cr))
    {
        state_global->ngtc = 0;

        /* Initialize lambda variables */
        initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, nullptr);
    }

    init_nrnb(nrnb);

    /* Interactive molecular dynamics */
    init_IMD(ir, cr, top_global, fplog, 1,
             MASTER(cr) ? as_rvec_array(state_global->x.data()) : nullptr,
             nfile, fnm, nullptr, mdrunOptions);

    if (ir->eI == eiNM)
    {
        GMX_ASSERT(shellfc != nullptr, "With NM we always support shells");

        *shellfc = init_shell_flexcon(stdout,
                                      top_global,
                                      n_flexible_constraints(constr),
                                      ir->nstcalcenergy,
                                      DOMAINDECOMP(cr));
    }
    else
    {
        GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexible-constraint support");

        /* With energy minimization, shells and flexible constraints are
         * automatically minimized when treated like normal DOFS.
         */
        if (shellfc != nullptr)
        {
            *shellfc = nullptr;
        }
    }

    auto mdatoms = mdAtoms->mdatoms();
    if (DOMAINDECOMP(cr))
    {
        *top = dd_init_local_top(top_global);

        dd_init_local_state(cr->dd, state_global, &ems->s);

        /* Distribute the charge groups over the nodes from the master node */
        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
                            state_global, top_global, ir,
                            &ems->s, &ems->f, mdAtoms, *top,
                            fr, vsite, constr,
                            nrnb, nullptr, FALSE);
        dd_store_state(cr->dd, &ems->s);

        *graph = nullptr;
    }
    else
    {
        state_change_natoms(state_global, state_global->natoms);
        /* Just copy the state */
        ems->s = *state_global;
        state_change_natoms(&ems->s, ems->s.natoms);
        /* We need to allocate one element extra, since we might use
         * (unaligned) 4-wide SIMD loads to access rvec entries.
         */
        ems->f.resize(gmx::paddedRVecVectorSize(ems->s.natoms));

        snew(*top, 1);
        mdAlgorithmsSetupAtomData(cr, ir, top_global, *top, fr,
                                  graph, mdAtoms,
                                  vsite, shellfc ? *shellfc : nullptr);

        if (vsite)
        {
            set_vsite_top(vsite, *top, mdatoms);
        }
    }

    update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]);

    if (constr)
    {
        if (ir->eConstrAlg == econtSHAKE &&
            gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
        {
            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
        }

        if (!DOMAINDECOMP(cr))
        {
            set_constraints(constr, *top, ir, mdatoms, cr);
        }

        if (!ir->bContinuation)
        {
            /* Constrain the starting coordinates */
            dvdl_constr = 0;
            constrain(PAR(cr) ? nullptr : fplog, TRUE, TRUE, constr, &(*top)->idef,
                      ir, cr, -1, 0, 1.0, mdatoms,
                      as_rvec_array(ems->s.x.data()),
                      as_rvec_array(ems->s.x.data()),
                      nullptr,
                      fr->bMolPBC, ems->s.box,
                      ems->s.lambda[efptFEP], &dvdl_constr,
                      nullptr, nullptr, nrnb, econqCoord);
        }
    }

    if (PAR(cr))
    {
        *gstat = global_stat_init(ir);
    }
    else
    {
        *gstat = nullptr;
    }

    *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, nullptr, wcycle);

    snew(*enerd, 1);
    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
                  *enerd);

    if (mdebin != nullptr)
    {
        /* Init bin for energy stuff */
        *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, nullptr);
    }

    clear_rvec(mu_tot);
    calc_shifts(ems->s.box, fr->shift_vec);

    /* PLUMED */
    if(plumedswitch){
      if(cr->ms && cr->ms->nsim>1) {
        if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters);
        if(PAR(cr)){
          if(DOMAINDECOMP(cr)) {
            (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
          }else{
            (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
          }
        }
        (*plumedcmd) (plumedmain,"GREX init",NULL);
      }
      if(PAR(cr)){
        if(DOMAINDECOMP(cr)) {
          (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
        }else{
          (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim);
        }
      }
      (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms);
      (*plumedcmd) (plumedmain,"setMDEngine","gromacs");
      (*plumedcmd) (plumedmain,"setLog",fplog);
      real real_delta_t;
      real_delta_t=ir->delta_t;
      (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t);
      (*plumedcmd) (plumedmain,"init",NULL);

      if(PAR(cr)){
        if(DOMAINDECOMP(cr)) {
          (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
          (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex);
        }
      }
    }
    /* END PLUMED */
}

//! Finalize the minimization
static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
                      gmx_walltime_accounting_t walltime_accounting,
                      gmx_wallcycle_t wcycle)
{
    if (!thisRankHasDuty(cr, DUTY_PME))
    {
        /* Tell the PME only node to finish */
        gmx_pme_send_finish(cr);
    }

    done_mdoutf(outf);

    em_time_end(walltime_accounting, wcycle);
}

//! Swap two different EM states during minimization
static void swap_em_state(em_state_t **ems1, em_state_t **ems2)
{
    em_state_t *tmp;

    tmp   = *ems1;
    *ems1 = *ems2;
    *ems2 = tmp;
}

//! Save the EM trajectory
static void write_em_traj(FILE *fplog, t_commrec *cr,
                          gmx_mdoutf_t outf,
                          gmx_bool bX, gmx_bool bF, const char *confout,
                          gmx_mtop_t *top_global,
                          t_inputrec *ir, gmx_int64_t step,
                          em_state_t *state,
                          t_state *state_global,
                          ObservablesHistory *observablesHistory)
{
    int mdof_flags = 0;

    if (bX)
    {
        mdof_flags |= MDOF_X;
    }
    if (bF)
    {
        mdof_flags |= MDOF_F;
    }

    /* If we want IMD output, set appropriate MDOF flag */
    if (ir->bIMD)
    {
        mdof_flags |= MDOF_IMD;
    }

    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
                                     top_global, step, (double)step,
                                     &state->s, state_global, observablesHistory,
                                     state->f);

    if (confout != nullptr)
carlocamilloni's avatar
carlocamilloni committed
    {
        if (DOMAINDECOMP(cr))
carlocamilloni's avatar
carlocamilloni committed
        {
            /* If bX=true, x was collected to state_global in the call above */
            if (!bX)
            {
                gmx::ArrayRef<gmx::RVec> globalXRef = MASTER(cr) ? gmx::makeArrayRef(state_global->x) : gmx::EmptyArrayRef();
                dd_collect_vec(cr->dd, &state->s, state->s.x, globalXRef);
            }
        }
        else
        {
            /* Copy the local state pointer */
carlocamilloni's avatar
carlocamilloni committed
            state_global = &state->s;
        }

        if (MASTER(cr))
carlocamilloni's avatar
carlocamilloni committed
        {
            if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
            {
                /* Make molecules whole only for confout writing */
                do_pbc_mtop(fplog, ir->ePBC, state->s.box, top_global,
                            as_rvec_array(state_global->x.data()));
            }
carlocamilloni's avatar
carlocamilloni committed

            write_sto_conf_mtop(confout,
                                *top_global->name, top_global,
                                as_rvec_array(state_global->x.data()), nullptr, ir->ePBC, state->s.box);
        }
carlocamilloni's avatar
carlocamilloni committed
    }
}

//! \brief Do one minimization step
//
// \returns true when the step succeeded, false when a constraint error occurred
static bool do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
                       gmx_bool bMolPBC,
                       em_state_t *ems1, real a, const PaddedRVecVector *force,
                       em_state_t *ems2,
                       gmx_constr_t constr, gmx_localtop_t *top,
                       t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                       gmx_int64_t count)

{
    t_state *s1, *s2;
    int      start, end;
    real     dvdl_constr;
    int      nthreads gmx_unused;

    bool     validStep = true;

    s1 = &ems1->s;
    s2 = &ems2->s;

    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
    {
        gmx_incons("state mismatch in do_em_step");
    }

    s2->flags = s1->flags;

    if (s2->natoms != s1->natoms)
    {
        state_change_natoms(s2, s1->natoms);
        /* We need to allocate one element extra, since we might use
         * (unaligned) 4-wide SIMD loads to access rvec entries.
         */
        ems2->f.resize(gmx::paddedRVecVectorSize(s2->natoms));
    }
    if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
    {
        s2->cg_gl.resize(s1->cg_gl.size());
    }

    copy_mat(s1->box, s2->box);
    /* Copy free energy state */
    s2->lambda = s1->lambda;
    copy_mat(s1->box, s2->box);

    start = 0;
    end   = md->homenr;

    // cppcheck-suppress unreadVariable
    nthreads = gmx_omp_nthreads_get(emntUpdate);
#pragma omp parallel num_threads(nthreads)
    {
        const rvec *x1 = as_rvec_array(s1->x.data());
        rvec       *x2 = as_rvec_array(s2->x.data());
        const rvec *f  = as_rvec_array(force->data());

        int         gf = 0;
#pragma omp for schedule(static) nowait
        for (int i = start; i < end; i++)
        {
            try
            {
                if (md->cFREEZE)
                {
                    gf = md->cFREEZE[i];
                }
                for (int m = 0; m < DIM; m++)
                {
                    if (ir->opts.nFreeze[gf][m])
                    {
                        x2[i][m] = x1[i][m];
                    }
                    else
                    {
                        x2[i][m] = x1[i][m] + a*f[i][m];
                    }
                }
            }
            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
        }

        if (s2->flags & (1<<estCGP))
        {
            /* Copy the CG p vector */
            const rvec *p1 = as_rvec_array(s1->cg_p.data());
            rvec       *p2 = as_rvec_array(s2->cg_p.data());
#pragma omp for schedule(static) nowait
            for (int i = start; i < end; i++)
            {
                // Trivial OpenMP block that does not throw
                copy_rvec(p1[i], p2[i]);
            }
        }

        if (DOMAINDECOMP(cr))
        {
            s2->ddp_count = s1->ddp_count;

            /* OpenMP does not supported unsigned loop variables */
#pragma omp for schedule(static) nowait
            for (int i = 0; i < static_cast<int>(s2->cg_gl.size()); i++)
            {
                s2->cg_gl[i] = s1->cg_gl[i];
            }
            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
        }
    }

    if (constr)
    {
        wallcycle_start(wcycle, ewcCONSTR);
        dvdl_constr = 0;
        validStep   =
            constrain(nullptr, TRUE, TRUE, constr, &top->idef,
                      ir, cr, count, 0, 1.0, md,
                      as_rvec_array(s1->x.data()), as_rvec_array(s2->x.data()),
                      nullptr, bMolPBC, s2->box,
                      s2->lambda[efptBONDED], &dvdl_constr,
                      nullptr, nullptr, nrnb, econqCoord);
        wallcycle_stop(wcycle, ewcCONSTR);

        if (cr->nnodes > 1)
        {
            /* This global reduction will affect performance at high
             * parallelization, but we can not really avoid it.
             * But usually EM is not run at high parallelization.
             */
            int reductionBuffer = !validStep;
            gmx_sumi(1, &reductionBuffer, cr);
            validStep           = (reductionBuffer == 0);
        }

carlocamilloni's avatar
carlocamilloni committed
747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
        // We should move this check to the different minimizers
        if (!validStep && ir->eI != eiSteep)
        {
            gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
        }
    }

    return validStep;
}

//! Prepare EM for using domain decomposition parallellization
static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
                                   gmx_mtop_t *top_global, t_inputrec *ir,
                                   em_state_t *ems, gmx_localtop_t *top,
                                   gmx::MDAtoms *mdAtoms, t_forcerec *fr,
                                   gmx_vsite_t *vsite, gmx_constr_t constr,
                                   t_nrnb *nrnb, gmx_wallcycle_t wcycle)
{
    /* Repartition the domain decomposition */
    dd_partition_system(fplog, step, cr, FALSE, 1,
                        nullptr, top_global, ir,
                        &ems->s, &ems->f,
                        mdAtoms, top, fr, vsite, constr,
                        nrnb, wcycle, FALSE);
    dd_store_state(cr->dd, &ems->s);
}

//! De one energy evaluation
static void evaluate_energy(FILE *fplog, t_commrec *cr,
                            gmx_mtop_t *top_global,
                            em_state_t *ems, gmx_localtop_t *top,
                            t_inputrec *inputrec,
                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                            gmx_global_stat_t gstat,
                            gmx_vsite_t *vsite, gmx_constr_t constr,
                            t_fcdata *fcd,
                            t_graph *graph, gmx::MDAtoms *mdAtoms,
                            t_forcerec *fr, rvec mu_tot,
                            gmx_enerdata_t *enerd, tensor vir, tensor pres,
                            gmx_int64_t count, gmx_bool bFirst)
{
    real     t;
    gmx_bool bNS;
    tensor   force_vir, shake_vir, ekin;
    real     dvdl_constr, prescorr, enercorr, dvdlcorr;
    real     terminate = 0;

    /* Set the time to the initial time, the time does not change during EM */
    t = inputrec->init_t;

    if (bFirst ||
        (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
    {
        /* This is the first state or an old state used before the last ns */
        bNS = TRUE;
    }
    else
    {
        bNS = FALSE;
        if (inputrec->nstlist > 0)
        {
            bNS = TRUE;
        }
    }

    if (vsite)
    {
        construct_vsites(vsite, as_rvec_array(ems->s.x.data()), 1, nullptr,
                         top->idef.iparams, top->idef.il,
                         fr->ePBC, fr->bMolPBC, cr, ems->s.box);
    }

    if (DOMAINDECOMP(cr) && bNS)
    {
        /* Repartition the domain decomposition */
        em_dd_partition_system(fplog, count, cr, top_global, inputrec,
                               ems, top, mdAtoms, fr, vsite, constr,
                               nrnb, wcycle);
    }

    /* Calc force & energy on new trial position  */
    /* do_force always puts the charge groups in the box and shifts again
     * We do not unshift, so molecules are always whole in congrad.c
     */
    /* PLUMED */
    int plumedNeedsEnergy=0;
    matrix plumed_vir;
    if(plumedswitch){
      long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&lstep);
      (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]);
      (*plumedcmd) (plumedmain,"setMasses",&mdAtoms->mdatoms()->massT[0]);
      (*plumedcmd) (plumedmain,"setCharges",&mdAtoms->mdatoms()->chargeA[0]);
      (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]);
      (*plumedcmd) (plumedmain,"prepareCalc",NULL);
      (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]);
      (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
      clear_mat(plumed_vir);
      (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]);
    }
    /* END PLUMED */

    do_force(fplog, cr, inputrec,
             count, nrnb, wcycle, top, &top_global->groups,
             ems->s.box, ems->s.x, &ems->s.hist,
             ems->f, force_vir, mdAtoms->mdatoms(), enerd, fcd,
             ems->s.lambda, graph, fr, vsite, mu_tot, t, nullptr, TRUE,
             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
             (bNS ? GMX_FORCE_NS : 0),
             DOMAINDECOMP(cr) ?
             DdOpenBalanceRegionBeforeForceComputation::yes :
             DdOpenBalanceRegionBeforeForceComputation::no,
             DOMAINDECOMP(cr) ?
             DdCloseBalanceRegionAfterForceComputation::yes :
             DdCloseBalanceRegionAfterForceComputation::no);
    /* PLUMED */
    if(plumedswitch){
      if(plumedNeedsEnergy) {
        msmul(force_vir,2.0,plumed_vir);
        (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]);
        (*plumedcmd) (plumedmain,"performCalc",NULL);
        msmul(plumed_vir,0.5,force_vir);
      } else {
        msmul(plumed_vir,0.5,plumed_vir);
        m_add(force_vir,plumed_vir,force_vir);
      }
    }
    /* END PLUMED */

    /* Clear the unused shake virial and pressure */
    clear_mat(shake_vir);
    clear_mat(pres);

    /* Communicate stuff when parallel */
    if (PAR(cr) && inputrec->eI != eiNM)
    {
        wallcycle_start(wcycle, ewcMoveE);

        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
                    inputrec, nullptr, nullptr, nullptr, 1, &terminate,
                    nullptr, FALSE,
                    CGLO_ENERGY |
                    CGLO_PRESSURE |
                    CGLO_CONSTRAINT);

        wallcycle_stop(wcycle, ewcMoveE);
    }

    /* Calculate long range corrections to pressure and energy */
    calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW],
                  pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
    enerd->term[F_DISPCORR] = enercorr;
    enerd->term[F_EPOT]    += enercorr;
    enerd->term[F_PRES]    += prescorr;
    enerd->term[F_DVDL]    += dvdlcorr;

    ems->epot = enerd->term[F_EPOT];

    if (constr)
    {
        /* Project out the constraint components of the force */
        wallcycle_start(wcycle, ewcCONSTR);
        dvdl_constr = 0;
        rvec *f_rvec = as_rvec_array(ems->f.data());
        constrain(nullptr, FALSE, FALSE, constr, &top->idef,
                  inputrec, cr, count, 0, 1.0, mdAtoms->mdatoms(),
                  as_rvec_array(ems->s.x.data()), f_rvec, f_rvec,
                  fr->bMolPBC, ems->s.box,
                  ems->s.lambda[efptBONDED], &dvdl_constr,
                  nullptr, &shake_vir, nrnb, econqForceDispl);
        enerd->term[F_DVDL_CONSTR] += dvdl_constr;
        m_add(force_vir, shake_vir, vir);
        wallcycle_stop(wcycle, ewcCONSTR);
    }
    else
    {
        copy_mat(force_vir, vir);
    }

    clear_mat(ekin);
    enerd->term[F_PRES] =
        calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);

    sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);

    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
    {
        get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems);
    }
}

//! Parallel utility summing energies and forces
static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
                              gmx_mtop_t *top_global,
                              em_state_t *s_min, em_state_t *s_b)
{
    t_block       *cgs_gl;
    int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
    double         partsum;
    unsigned char *grpnrFREEZE;

    if (debug)
    {
        fprintf(debug, "Doing reorder_partsum\n");
    }

    const rvec *fm = as_rvec_array(s_min->f.data());
    const rvec *fb = as_rvec_array(s_b->f.data());

    cgs_gl = dd_charge_groups_global(cr->dd);
    index  = cgs_gl->index;

    /* Collect fm in a global vector fmg.
     * This conflicts with the spirit of domain decomposition,
     * but to fully optimize this a much more complicated algorithm is required.
     */
    rvec *fmg;
    snew(fmg, top_global->natoms);

    ncg   = s_min->s.cg_gl.size();
    cg_gl = s_min->s.cg_gl.data();
    i     = 0;
    for (c = 0; c < ncg; c++)
    {
        cg = cg_gl[c];
        a0 = index[cg];
        a1 = index[cg+1];
        for (a = a0; a < a1; a++)
        {
            copy_rvec(fm[i], fmg[a]);
            i++;
        }
    }
    gmx_sum(top_global->natoms*3, fmg[0], cr);

    /* Now we will determine the part of the sum for the cgs in state s_b */
    ncg         = s_b->s.cg_gl.size();
    cg_gl       = s_b->s.cg_gl.data();
    partsum     = 0;
    i           = 0;
    gf          = 0;
    grpnrFREEZE = top_global->groups.grpnr[egcFREEZE];
    for (c = 0; c < ncg; c++)
    {
        cg = cg_gl[c];
        a0 = index[cg];
        a1 = index[cg+1];
        for (a = a0; a < a1; a++)
        {
            if (mdatoms->cFREEZE && grpnrFREEZE)
            {
                gf = grpnrFREEZE[i];
            }
            for (m = 0; m < DIM; m++)
            {
                if (!opts->nFreeze[gf][m])
                {
                    partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
                }
            }
            i++;
        }
    }

    sfree(fmg);

    return partsum;
}

//! Print some stuff, like beta, whatever that means.
static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
                    gmx_mtop_t *top_global,
                    em_state_t *s_min, em_state_t *s_b)
{
    double sum;

    /* This is just the classical Polak-Ribiere calculation of beta;
     * it looks a bit complicated since we take freeze groups into account,
     * and might have to sum it in parallel runs.
     */

    if (!DOMAINDECOMP(cr) ||
        (s_min->s.ddp_count == cr->dd->ddp_count &&
         s_b->s.ddp_count   == cr->dd->ddp_count))
    {
        const rvec *fm  = as_rvec_array(s_min->f.data());
        const rvec *fb  = as_rvec_array(s_b->f.data());
        sum             = 0;
        int         gf  = 0;
        /* This part of code can be incorrect with DD,
         * since the atom ordering in s_b and s_min might differ.
         */
        for (int i = 0; i < mdatoms->homenr; i++)
        {
            if (mdatoms->cFREEZE)
            {
                gf = mdatoms->cFREEZE[i];
            }
            for (int m = 0; m < DIM; m++)
            {
                if (!opts->nFreeze[gf][m])
                {
                    sum += (fb[i][m] - fm[i][m])*fb[i][m];
                }
            }
        }
    }
    else
    {
        /* We need to reorder cgs while summing */
        sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b);
    }
    if (PAR(cr))
    {
        gmx_sumd(1, &sum, cr);
    }

    return sum/gmx::square(s_min->fnorm);
}

namespace gmx
{

/*! \brief Do conjugate gradients minimization
    \copydoc integrator_t(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
                           int nfile, const t_filenm fnm[],
                           const gmx_output_env_t *oenv,
                           const MdrunOptions &mdrunOptions,
                           gmx_vsite_t *vsite, gmx_constr_t constr,
                           gmx::IMDOutputProvider *outputProvider,
                           t_inputrec *inputrec,
                           gmx_mtop_t *top_global, t_fcdata *fcd,
                           t_state *state_global,
                           gmx::MDAtoms *mdAtoms,
                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                           gmx_edsam_t ed,
                           t_forcerec *fr,
                           const ReplicaExchangeParameters &replExParams,
                           gmx_membed_t gmx_unused *membed,
                           gmx_walltime_accounting_t walltime_accounting)
 */
double do_cg(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
             int nfile, const t_filenm fnm[],
             const gmx_output_env_t gmx_unused *oenv,
             const MdrunOptions &mdrunOptions,
             gmx_vsite_t *vsite, gmx_constr_t constr,
             gmx::IMDOutputProvider *outputProvider,
             t_inputrec *inputrec,
             gmx_mtop_t *top_global, t_fcdata *fcd,
             t_state *state_global,
             ObservablesHistory *observablesHistory,
             gmx::MDAtoms *mdAtoms,
             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
             t_forcerec *fr,
             const ReplicaExchangeParameters gmx_unused &replExParams,
             gmx_membed_t gmx_unused *membed,
             gmx_walltime_accounting_t walltime_accounting)
{
    const char       *CG = "Polak-Ribiere Conjugate Gradients";

    gmx_localtop_t   *top;
    gmx_enerdata_t   *enerd;
    gmx_global_stat_t gstat;
    t_graph          *graph;
    double            tmp, minstep;
    real              stepsize;
    real              a, b, c, beta = 0.0;
    real              epot_repl = 0;
    real              pnorm;
    t_mdebin         *mdebin;
    gmx_bool          converged, foundlower;
    rvec              mu_tot;
    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
    tensor            vir, pres;
    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
    gmx_mdoutf_t      outf;
    int               m, step, nminstep;
    auto              mdatoms = mdAtoms->mdatoms();

    step = 0;

    if (MASTER(cr))
    {
        // In CG, the state is extended with a search direction
        state_global->flags |= (1<<estCGP);

        // Ensure the extra per-atom state array gets allocated
        state_change_natoms(state_global, state_global->natoms);

        // Initialize the search direction to zero
        for (RVec &cg_p : state_global->cg_p)
        {
            cg_p = { 0, 0, 0 };
        }
    }
carlocamilloni's avatar
carlocamilloni committed

    /* Create 4 states on the stack and extract pointers that we will swap */
    em_state_t  s0 {}, s1 {}, s2 {}, s3 {};
    em_state_t *s_min = &s0;
    em_state_t *s_a   = &s1;
    em_state_t *s_b   = &s2;
    em_state_t *s_c   = &s3;

    /* Init em and store the local state in s_min */
    init_em(fplog, CG, cr, outputProvider, inputrec, mdrunOptions,
            state_global, top_global, s_min, &top,
            nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat,
            vsite, constr, nullptr,
            nfile, fnm, &outf, &mdebin, wcycle);

    /* Print to log file */
    print_em_start(fplog, cr, walltime_accounting, wcycle, CG);

    /* Max number of steps */
    number_steps = inputrec->nsteps;

    if (MASTER(cr))
    {
        sp_header(stderr, CG, inputrec->em_tol, number_steps);
    }
    if (fplog)
    {
        sp_header(fplog, CG, inputrec->em_tol, number_steps);
    }

    /* Call the force routine and some auxiliary (neighboursearching etc.) */
    /* do_force always puts the charge groups in the box and shifts again
     * We do not unshift, so molecules are always whole in congrad.c
     */
    evaluate_energy(fplog, cr,
                    top_global, s_min, top,
                    inputrec, nrnb, wcycle, gstat,
                    vsite, constr, fcd, graph, mdAtoms, fr,
                    mu_tot, enerd, vir, pres, -1, TRUE);
    where();

    if (MASTER(cr))
    {
        /* Copy stuff to the energy bin for easy printing etc. */
        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
                   mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
                   nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);

        print_ebin_header(fplog, step, step);
        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
                   mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
    }
    where();

    /* Estimate/guess the initial stepsize */
    stepsize = inputrec->em_stepsize/s_min->fnorm;

    if (MASTER(cr))
    {
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
        fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
                s_min->fmax, s_min->a_fmax+1);
        fprintf(stderr, "   F-Norm            = %12.5e\n",
                s_min->fnorm/sqrtNumAtoms);
        fprintf(stderr, "\n");
        /* and copy to the log file too... */
        fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
                s_min->fmax, s_min->a_fmax+1);
        fprintf(fplog, "   F-Norm            = %12.5e\n",
                s_min->fnorm/sqrtNumAtoms);
        fprintf(fplog, "\n");
    }
    /* Start the loop over CG steps.
     * Each successful step is counted, and we continue until
     * we either converge or reach the max number of steps.
     */
    converged = FALSE;
    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
    {

        /* start taking steps in a new direction
         * First time we enter the routine, beta=0, and the direction is
         * simply the negative gradient.
         */

        /* Calculate the new direction in p, and the gradient in this direction, gpa */
        rvec       *pm  = as_rvec_array(s_min->s.cg_p.data());
        const rvec *sfm = as_rvec_array(s_min->f.data());
        double      gpa = 0;
        int         gf  = 0;
        for (int i = 0; i < mdatoms->homenr; i++)
        {
            if (mdatoms->cFREEZE)
            {
                gf = mdatoms->cFREEZE[i];
            }
            for (m = 0; m < DIM; m++)
            {
                if (!inputrec->opts.nFreeze[gf][m])
                {
                    pm[i][m] = sfm[i][m] + beta*pm[i][m];
                    gpa     -= pm[i][m]*sfm[i][m];
                    /* f is negative gradient, thus the sign */
                }
                else
                {
                    pm[i][m] = 0;
                }
            }
        }

        /* Sum the gradient along the line across CPUs */
        if (PAR(cr))
        {
            gmx_sumd(1, &gpa, cr);
        }

        /* Calculate the norm of the search vector */
        get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr);

        /* Just in case stepsize reaches zero due to numerical precision... */
        if (stepsize <= 0)
        {
            stepsize = inputrec->em_stepsize/pnorm;
        }

        /*
         * Double check the value of the derivative in the search direction.
         * If it is positive it must be due to the old information in the
         * CG formula, so just remove that and start over with beta=0.
         * This corresponds to a steepest descent step.
         */
        if (gpa > 0)
        {
            beta = 0;
            step--;   /* Don't count this step since we are restarting */
            continue; /* Go back to the beginning of the big for-loop */
        }

        /* Calculate minimum allowed stepsize, before the average (norm)
         * relative change in coordinate is smaller than precision
         */
        minstep = 0;
        for (int i = 0; i < mdatoms->homenr; i++)
        {
            for (m = 0; m < DIM; m++)
            {
                tmp = fabs(s_min->s.x[i][m]);
                if (tmp < 1.0)
                {
                    tmp = 1.0;
                }
                tmp      = pm[i][m]/tmp;
                minstep += tmp*tmp;
            }
        }
        /* Add up from all CPUs */
        if (PAR(cr))
        {
            gmx_sumd(1, &minstep, cr);
        }

        minstep = GMX_REAL_EPS/sqrt(minstep/(3*top_global->natoms));
carlocamilloni's avatar
carlocamilloni committed
1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637

        if (stepsize < minstep)
        {
            converged = TRUE;
            break;
        }

        /* Write coordinates if necessary */
        do_x = do_per_step(step, inputrec->nstxout);
        do_f = do_per_step(step, inputrec->nstfout);

        write_em_traj(fplog, cr, outf, do_x, do_f, nullptr,
                      top_global, inputrec, step,
                      s_min, state_global, observablesHistory);

        /* Take a step downhill.
         * In theory, we should minimize the function along this direction.
         * That is quite possible, but it turns out to take 5-10 function evaluations
         * for each line. However, we dont really need to find the exact minimum -
         * it is much better to start a new CG step in a modified direction as soon
         * as we are close to it. This will save a lot of energy evaluations.
         *
         * In practice, we just try to take a single step.
         * If it worked (i.e. lowered the energy), we increase the stepsize but
         * the continue straight to the next CG step without trying to find any minimum.
         * If it didn't work (higher energy), there must be a minimum somewhere between
         * the old position and the new one.
         *
         * Due to the finite numerical accuracy, it turns out that it is a good idea
         * to even accept a SMALL increase in energy, if the derivative is still downhill.
         * This leads to lower final energies in the tests I've done. / Erik
         */
        s_a->epot = s_min->epot;
        a         = 0.0;
        c         = a + stepsize; /* reference position along line is zero */

        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
        {
            em_dd_partition_system(fplog, step, cr, top_global, inputrec,
                                   s_min, top, mdAtoms, fr, vsite, constr,
                                   nrnb, wcycle);
        }

        /* Take a trial step (new coords in s_c) */
        do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, &s_min->s.cg_p, s_c,
                   constr, top, nrnb, wcycle, -1);

        neval++;
        /* Calculate energy for the trial step */
        evaluate_energy(fplog, cr,
                        top_global, s_c, top,
                        inputrec, nrnb, wcycle, gstat,
                        vsite, constr, fcd, graph, mdAtoms, fr,
                        mu_tot, enerd, vir, pres, -1, FALSE);

        /* Calc derivative along line */
        const rvec *pc  = as_rvec_array(s_c->s.cg_p.data());
        const rvec *sfc = as_rvec_array(s_c->f.data());
        double      gpc = 0;
        for (int i = 0; i < mdatoms->homenr; i++)
        {
            for (m = 0; m < DIM; m++)
            {
                gpc -= pc[i][m]*sfc[i][m]; /* f is negative gradient, thus the sign */
            }
        }
        /* Sum the gradient along the line across CPUs */
        if (PAR(cr))
        {
            gmx_sumd(1, &gpc, cr);
        }

        /* This is the max amount of increase in energy we tolerate */
        tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);

        /* Accept the step if the energy is lower, or if it is not significantly higher
         * and the line derivative is still negative.
         */
        if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
        {
            foundlower = TRUE;
            /* Great, we found a better energy. Increase step for next iteration
             * if we are still going down, decrease it otherwise
             */
            if (gpc < 0)
            {
                stepsize *= 1.618034; /* The golden section */
            }
            else
            {
                stepsize *= 0.618034; /* 1/golden section */
            }
        }
        else
        {
            /* New energy is the same or higher. We will have to do some work
             * to find a smaller value in the interval. Take smaller step next time!
             */
            foundlower = FALSE;
            stepsize  *= 0.618034;
        }




        /* OK, if we didn't find a lower value we will have to locate one now - there must
         * be one in the interval [a=0,c].
         * The same thing is valid here, though: Don't spend dozens of iterations to find
         * the line minimum. We try to interpolate based on the derivative at the endpoints,
         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
         *
         * I also have a safeguard for potentially really pathological functions so we never
         * take more than 20 steps before we give up ...
         *
         * If we already found a lower value we just skip this step and continue to the update.
         */
        double gpb;
        if (!foundlower)
        {
            nminstep = 0;

            do
            {
                /* Select a new trial point.
                 * If the derivatives at points a & c have different sign we interpolate to zero,
                 * otherwise just do a bisection.
                 */
                if (gpa < 0 && gpc > 0)
                {
                    b = a + gpa*(a-c)/(gpc-gpa);
                }
                else
                {
                    b = 0.5*(a+c);
                }

                /* safeguard if interpolation close to machine accuracy causes errors:
                 * never go outside the interval
                 */
                if (b <= a || b >= c)
                {
                    b = 0.5*(a+c);
                }

                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
                {
                    /* Reload the old state */
                    em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
                                           s_min, top, mdAtoms, fr, vsite, constr,
                                           nrnb, wcycle);
                }

                /* Take a trial step to this new point - new coords in s_b */
                do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, &s_min->s.cg_p, s_b,
                           constr, top, nrnb, wcycle, -1);

                neval++;
                /* Calculate energy for the trial step */
                evaluate_energy(fplog, cr,
                                top_global, s_b, top,
                                inputrec, nrnb, wcycle, gstat,
                                vsite, constr, fcd, graph, mdAtoms, fr,
                                mu_tot, enerd, vir, pres, -1, FALSE);

                /* p does not change within a step, but since the domain decomposition
                 * might change, we have to use cg_p of s_b here.
                 */
                const rvec *pb  = as_rvec_array(s_b->s.cg_p.data());
                const rvec *sfb = as_rvec_array(s_b->f.data());
                gpb             = 0;
                for (int i = 0; i < mdatoms->homenr; i++)
                {
                    for (m = 0; m < DIM; m++)
                    {
                        gpb -= pb[i][m]*sfb[i][m]; /* f is negative gradient, thus the sign */
                    }
                }
                /* Sum the gradient along the line across CPUs */
                if (PAR(cr))
                {
                    gmx_sumd(1, &gpb, cr);
                }

                if (debug)
                {
                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
                            s_a->epot, s_b->epot, s_c->epot, gpb);
                }

                epot_repl = s_b->epot;

                /* Keep one of the intervals based on the value of the derivative at the new point */
                if (gpb > 0)
                {
                    /* Replace c endpoint with b */
                    swap_em_state(&s_b, &s_c);
                    c   = b;
                    gpc = gpb;
                }
                else
                {
                    /* Replace a endpoint with b */
                    swap_em_state(&s_b, &s_a);
                    a   = b;
                    gpa = gpb;
                }

                /*
                 * Stop search as soon as we find a value smaller than the endpoints.
                 * Never run more than 20 steps, no matter what.
                 */
                nminstep++;
            }
            while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
                   (nminstep < 20));

            if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
                nminstep >= 20)
            {
                /* OK. We couldn't find a significantly lower energy.
                 * If beta==0 this was steepest descent, and then we give up.
                 * If not, set beta=0 and restart with steepest descent before quitting.
                 */
                if (beta == 0.0)
                {
                    /* Converged */
                    converged = TRUE;
                    break;
                }
                else
                {
                    /* Reset memory before giving up */
                    beta = 0.0;
                    continue;
                }
            }

            /* Select min energy state of A & C, put the best in B.
             */
            if (s_c->epot < s_a->epot)
            {
                if (debug)
                {
                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
                            s_c->epot, s_a->epot);
                }
                swap_em_state(&s_b, &s_c);
                gpb = gpc;
            }
            else
            {
                if (debug)
                {
                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
                            s_a->epot, s_c->epot);
                }
                swap_em_state(&s_b, &s_a);
                gpb = gpa;
            }

        }
        else
        {
            if (debug)
            {
                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
                        s_c->epot);
            }
            swap_em_state(&s_b, &s_c);
            gpb = gpc;
        }

        /* new search direction */
        /* beta = 0 means forget all memory and restart with steepest descents. */
        if (nstcg && ((step % nstcg) == 0))
        {
            beta = 0.0;
        }
        else
        {
            /* s_min->fnorm cannot be zero, because then we would have converged
             * and broken out.
             */

            /* Polak-Ribiere update.
             * Change to fnorm2/fnorm2_old for Fletcher-Reeves
             */
            beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
        }
        /* Limit beta to prevent oscillations */
        if (fabs(beta) > 5.0)
        {
            beta = 0.0;
        }


        /* update positions */
        swap_em_state(&s_min, &s_b);
        gpa = gpb;

        /* Print it if necessary */
        if (MASTER(cr))
        {
            if (mdrunOptions.verbose)
            {
                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
                        step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
                        s_min->fmax, s_min->a_fmax+1);
                fflush(stderr);
            }
            /* Store the new (lower) energies */
            upd_mdebin(mdebin, FALSE, FALSE, (double)step,
                       mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
                       nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);

            do_log = do_per_step(step, inputrec->nstlog);
            do_ene = do_per_step(step, inputrec->nstenergy);

            /* Prepare IMD energy record, if bIMD is TRUE. */
            IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE);

            if (do_log)
            {
                print_ebin_header(fplog, step, step);
            }
            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
                       do_log ? fplog : nullptr, step, step, eprNORMAL,
                       mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
        }

        /* Send energies and positions to the IMD client if bIMD is TRUE. */
        if (MASTER(cr) && do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, as_rvec_array(state_global->x.data()), inputrec, 0, wcycle))
carlocamilloni's avatar
carlocamilloni committed
        {
            IMD_send_positions(inputrec->imd);
        }

        /* Stop when the maximum force lies below tolerance.
         * If we have reached machine precision, converged is already set to true.
         */
        converged = converged || (s_min->fmax < inputrec->em_tol);

    }   /* End of the loop */

    /* IMD cleanup, if bIMD is TRUE. */
    IMD_finalize(inputrec->bIMD, inputrec->imd);

    if (converged)
    {
        step--; /* we never took that last step in this case */

    }
    if (s_min->fmax > inputrec->em_tol)
    {
        if (MASTER(cr))
        {
            warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
            warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
        }
        converged = FALSE;
    }

    if (MASTER(cr))
    {
        /* If we printed energy and/or logfile last step (which was the last step)
         * we don't have to do it again, but otherwise print the final values.
         */
        if (!do_log)
        {
            /* Write final value to log since we didn't do anything the last step */
            print_ebin_header(fplog, step, step);
        }
        if (!do_ene || !do_log)
        {
            /* Write final energy file entries */
            print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
                       !do_log ? fplog : nullptr, step, step, eprNORMAL,
                       mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
        }
    }

    /* Print some stuff... */
    if (MASTER(cr))
    {
        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
    }

    /* IMPORTANT!
     * For accurate normal mode calculation it is imperative that we
     * store the last conformation into the full precision binary trajectory.
     *
     * However, we should only do it if we did NOT already write this step
     * above (which we did if do_x or do_f was true).
     */
    /* Note that with 0 < nstfout != nstxout we can end up with two frames
     * in the trajectory with the same step number.
     */
carlocamilloni's avatar
carlocamilloni committed
1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200
    do_x = !do_per_step(step, inputrec->nstxout);
    do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));

    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
                  top_global, inputrec, step,
                  s_min, state_global, observablesHistory);


    if (MASTER(cr))
    {
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
                        s_min, sqrtNumAtoms);
        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
                        s_min, sqrtNumAtoms);

        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
    }

    finish_em(cr, outf, walltime_accounting, wcycle);

    /* To print the actual number of steps we needed somewhere */
    walltime_accounting_set_nsteps_done(walltime_accounting, step);

    return 0;
}   /* That's all folks */


/*! \brief Do L-BFGS conjugate gradients minimization
    \copydoc integrator_t(FILE *fplog, t_commrec *cr, const gmx::MDLogger &mdlog,
                          int nfile, const t_filenm fnm[],
                          const gmx_output_env_t *oenv,
                          const MdrunOptions &mdrunOptions,
                          gmx_vsite_t *vsite, gmx_constr_t constr,
                          gmx::IMDOutputProvider *outputProvider,
                          t_inputrec *inputrec,
                          gmx_mtop_t *top_global, t_fcdata *fcd,
                          t_state *state_global,
                          gmx::MDAtoms *mdAtoms,
                          t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                          gmx_edsam_t ed,
                          t_forcerec *fr,
                          const ReplicaExchangeParameters &replExParams,
                          gmx_membed_t gmx_unused *membed,
                          gmx_walltime_accounting_t walltime_accounting)
 */
double do_lbfgs(FILE *fplog, t_commrec *cr, const gmx::MDLogger gmx_unused &mdlog,
                int nfile, const t_filenm fnm[],
                const gmx_output_env_t gmx_unused *oenv,
                const MdrunOptions &mdrunOptions,
                gmx_vsite_t *vsite, gmx_constr_t constr,
                gmx::IMDOutputProvider *outputProvider,
                t_inputrec *inputrec,
                gmx_mtop_t *top_global, t_fcdata *fcd,
                t_state *state_global,
                ObservablesHistory *observablesHistory,
                gmx::MDAtoms *mdAtoms,
                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                t_forcerec *fr,
                const ReplicaExchangeParameters gmx_unused &replExParams,
                gmx_membed_t gmx_unused *membed,
                gmx_walltime_accounting_t walltime_accounting)
{
    static const char *LBFGS = "Low-Memory BFGS Minimizer";
    em_state_t         ems;
    gmx_localtop_t    *top;
    gmx_enerdata_t    *enerd;
    gmx_global_stat_t  gstat;
    t_graph           *graph;
    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
    real              *rho, *alpha, *p, *s, **dx, **dg;
    real               a, b, c, maxdelta, delta;
    real               diag, Epot0;
    real               dgdx, dgdg, sq, yr, beta;
    t_mdebin          *mdebin;
    gmx_bool           converged;
    rvec               mu_tot;
    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
    tensor             vir, pres;
    int                start, end, number_steps;
    gmx_mdoutf_t       outf;
    int                i, k, m, n, gf, step;
    int                mdof_flags;
    auto               mdatoms = mdAtoms->mdatoms();

    if (PAR(cr))
    {
        gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
    }

    if (nullptr != constr)
    {
        gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
    }

    n        = 3*state_global->natoms;
    nmaxcorr = inputrec->nbfgscorr;

    snew(frozen, n);

    snew(p, n);
    snew(rho, nmaxcorr);
    snew(alpha, nmaxcorr);

    snew(dx, nmaxcorr);
    for (i = 0; i < nmaxcorr; i++)
    {
        snew(dx[i], n);
    }

    snew(dg, nmaxcorr);
    for (i = 0; i < nmaxcorr; i++)
    {
        snew(dg[i], n);
    }

    step  = 0;
    neval = 0;

    /* Init em */
    init_em(fplog, LBFGS, cr, outputProvider, inputrec, mdrunOptions,
            state_global, top_global, &ems, &top,
            nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat,
            vsite, constr, nullptr,
            nfile, fnm, &outf, &mdebin, wcycle);

    start = 0;
    end   = mdatoms->homenr;

    /* We need 4 working states */
    em_state_t  s0 {}, s1 {}, s2 {}, s3 {};
    em_state_t *sa   = &s0;
    em_state_t *sb   = &s1;
    em_state_t *sc   = &s2;
    em_state_t *last = &s3;
    /* Initialize by copying the state from ems (we could skip x and f here) */
    *sa              = ems;
    *sb              = ems;
    *sc              = ems;

    /* Print to log file */
    print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);

    do_log = do_ene = do_x = do_f = TRUE;

    /* Max number of steps */
    number_steps = inputrec->nsteps;

    /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
    gf = 0;
    for (i = start; i < end; i++)
    {
        if (mdatoms->cFREEZE)
        {
            gf = mdatoms->cFREEZE[i];
        }
        for (m = 0; m < DIM; m++)
        {
            frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
        }
    }
    if (MASTER(cr))
    {
        sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
    }
    if (fplog)
    {
        sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
    }

    if (vsite)
    {
        construct_vsites(vsite, as_rvec_array(state_global->x.data()), 1, nullptr,
                         top->idef.iparams, top->idef.il,
                         fr->ePBC, fr->bMolPBC, cr, state_global->box);
    }

    /* Call the force routine and some auxiliary (neighboursearching etc.) */
    /* do_force always puts the charge groups in the box and shifts again
     * We do not unshift, so molecules are always whole
     */
    neval++;
    evaluate_energy(fplog, cr,
                    top_global, &ems, top,
                    inputrec, nrnb, wcycle, gstat,
                    vsite, constr, fcd, graph, mdAtoms, fr,
                    mu_tot, enerd, vir, pres, -1, TRUE);
    where();

    if (MASTER(cr))
    {
        /* Copy stuff to the energy bin for easy printing etc. */
        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
                   mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
                   nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);

        print_ebin_header(fplog, step, step);
        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
                   mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr);
    }
    where();

    /* Set the initial step.
     * since it will be multiplied by the non-normalized search direction
     * vector (force vector the first time), we scale it by the
     * norm of the force.
     */

    if (MASTER(cr))
    {
        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
        fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
        fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm/sqrtNumAtoms);
        fprintf(stderr, "\n");
        /* and copy to the log file too... */
        fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
        fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm/sqrtNumAtoms);
        fprintf(fplog, "\n");
    }

    // Point is an index to the memory of search directions, where 0 is the first one.
    point = 0;

    // Set initial search direction to the force (-gradient), or 0 for frozen particles.
    real *fInit = static_cast<real *>(as_rvec_array(ems.f.data())[0]);
    for (i = 0; i < n; i++)
    {
        if (!frozen[i])
        {
            dx[point][i] = fInit[i]; /* Initial search direction */
        }
        else
        {
            dx[point][i] = 0;
        }
    }

    // Stepsize will be modified during the search, and actually it is not critical
    // (the main efficiency in the algorithm comes from changing directions), but
    // we still need an initial value, so estimate it as the inverse of the norm
    // so we take small steps where the potential fluctuates a lot.
    stepsize  = 1.0/ems.fnorm;

    /* Start the loop over BFGS steps.
     * Each successful step is counted, and we continue until
     * we either converge or reach the max number of steps.
     */

    ncorr = 0;

    /* Set the gradient from the force */
    converged = FALSE;
    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
    {

        /* Write coordinates if necessary */
        do_x = do_per_step(step, inputrec->nstxout);
        do_f = do_per_step(step, inputrec->nstfout);

        mdof_flags = 0;
        if (do_x)
        {
            mdof_flags |= MDOF_X;
        }

        if (do_f)
        {
            mdof_flags |= MDOF_F;
        }

        if (inputrec->bIMD)
        {
            mdof_flags |= MDOF_IMD;
        }

        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
                                         top_global, step, (real)step, &ems.s, state_global, observablesHistory, ems.f);

        /* Do the linesearching in the direction dx[point][0..(n-1)] */

        /* make s a pointer to current search direction - point=0 first time we get here */
        s = dx[point];

        real *xx = static_cast<real *>(as_rvec_array(ems.s.x.data())[0]);
        real *ff = static_cast<real *>(as_rvec_array(ems.f.data())[0]);

        // calculate line gradient in position A
        for (gpa = 0, i = 0; i < n; i++)
        {
            gpa -= s[i]*ff[i];
        }

        /* Calculate minimum allowed stepsize along the line, before the average (norm)
         * relative change in coordinate is smaller than precision
         */
        for (minstep = 0, i = 0; i < n; i++)
        {
            tmp = fabs(xx[i]);
            if (tmp < 1.0)
            {
                tmp = 1.0;
            }
            tmp      = s[i]/tmp;
            minstep += tmp*tmp;
        }
        minstep = GMX_REAL_EPS/sqrt(minstep/n);

        if (stepsize < minstep)
        {
            converged = TRUE;
            break;
        }

        // Before taking any steps along the line, store the old position
        *last       = ems;
        real *lastx = static_cast<real *>(as_rvec_array(last->s.x.data())[0]);
        real *lastf = static_cast<real *>(as_rvec_array(last->f.data())[0]);
        Epot0       = ems.epot;

        *sa         = ems;

        /* Take a step downhill.
         * In theory, we should find the actual minimum of the function in this
         * direction, somewhere along the line.
         * That is quite possible, but it turns out to take 5-10 function evaluations
         * for each line. However, we dont really need to find the exact minimum -
         * it is much better to start a new BFGS step in a modified direction as soon
         * as we are close to it. This will save a lot of energy evaluations.
         *
         * In practice, we just try to take a single step.
         * If it worked (i.e. lowered the energy), we increase the stepsize but
         * continue straight to the next BFGS step without trying to find any minimum,
         * i.e. we change the search direction too. If the line was smooth, it is
         * likely we are in a smooth region, and then it makes sense to take longer
         * steps in the modified search direction too.
         *
         * If it didn't work (higher energy), there must be a minimum somewhere between
         * the old position and the new one. Then we need to start by finding a lower
         * value before we change search direction. Since the energy was apparently
         * quite rough, we need to decrease the step size.
         *
         * Due to the finite numerical accuracy, it turns out that it is a good idea
         * to accept a SMALL increase in energy, if the derivative is still downhill.
         * This leads to lower final energies in the tests I've done. / Erik
         */

        // State "A" is the first position along the line.
        // reference position along line is initially zero
        a          = 0.0;

        // Check stepsize first. We do not allow displacements
        // larger than emstep.
        //
        do
        {
            // Pick a new position C by adding stepsize to A.
            c        = a + stepsize;

            // Calculate what the largest change in any individual coordinate
            // would be (translation along line * gradient along line)
            maxdelta = 0;
            for (i = 0; i < n; i++)
            {
                delta = c*s[i];
                if (delta > maxdelta)
                {
                    maxdelta = delta;
                }
            }
            // If any displacement is larger than the stepsize limit, reduce the step
            if (maxdelta > inputrec->em_stepsize)
            {
                stepsize *= 0.1;
            }
        }
        while (maxdelta > inputrec->em_stepsize);

        // Take a trial step and move the coordinate array xc[] to position C
        real *xc = static_cast<real *>(as_rvec_array(sc->s.x.data())[0]);
        for (i = 0; i < n; i++)
        {
            xc[i] = lastx[i] + c*s[i];
        }

        neval++;
        // Calculate energy for the trial step in position C
        evaluate_energy(fplog, cr,
                        top_global, sc, top,
                        inputrec, nrnb, wcycle, gstat,
                        vsite, constr, fcd, graph, mdAtoms, fr,
                        mu_tot, enerd, vir, pres, step, FALSE);

        // Calc line gradient in position C
        real *fc = static_cast<real *>(as_rvec_array(sc->f.data())[0]);
        for (gpc = 0, i = 0; i < n; i++)
        {
            gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
        }
        /* Sum the gradient along the line across CPUs */
        if (PAR(cr))
        {
            gmx_sumd(1, &gpc, cr);
        }

        // This is the max amount of increase in energy we tolerate.
        // By allowing VERY small changes (close to numerical precision) we
        // frequently find even better (lower) final energies.
        tmp = sqrt(GMX_REAL_EPS)*fabs(sa->epot);

        // Accept the step if the energy is lower in the new position C (compared to A),
        // or if it is not significantly higher and the line derivative is still negative.
        if (sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp)))
        {
            // Great, we found a better energy. We no longer try to alter the
            // stepsize, but simply accept this new better position. The we select a new
            // search direction instead, which will be much more efficient than continuing
            // to take smaller steps along a line. Set fnorm based on the new C position,
            // which will be used to update the stepsize to 1/fnorm further down.
            foundlower = TRUE;
        }
        else
        {
            // If we got here, the energy is NOT lower in point C, i.e. it will be the same
            // or higher than in point A. In this case it is pointless to move to point C,
            // so we will have to do more iterations along the same line to find a smaller
            // value in the interval [A=0.0,C].
            // Here, A is still 0.0, but that will change when we do a search in the interval
            // [0.0,C] below. That search we will do by interpolation or bisection rather
            // than with the stepsize, so no need to modify it. For the next search direction
            // it will be reset to 1/fnorm anyway.
            foundlower = FALSE;
        }

        if (!foundlower)
        {
            // OK, if we didn't find a lower value we will have to locate one now - there must
            // be one in the interval [a,c].
            // The same thing is valid here, though: Don't spend dozens of iterations to find
            // the line minimum. We try to interpolate based on the derivative at the endpoints,
            // and only continue until we find a lower value. In most cases this means 1-2 iterations.
            // I also have a safeguard for potentially really pathological functions so we never
            // take more than 20 steps before we give up.
            // If we already found a lower value we just skip this step and continue to the update.
            real fnorm = 0;
            nminstep   = 0;
            do
            {
                // Select a new trial point B in the interval [A,C].
                // If the derivatives at points a & c have different sign we interpolate to zero,
                // otherwise just do a bisection since there might be multiple minima/maxima
                // inside the interval.
                if (gpa < 0 && gpc > 0)
                {
                    b = a + gpa*(a-c)/(gpc-gpa);
                }
                else
                {
                    b = 0.5*(a+c);
                }

                /* safeguard if interpolation close to machine accuracy causes errors:
                 * never go outside the interval
                 */
                if (b <= a || b >= c)
                {
                    b = 0.5*(a+c);
                }

                // Take a trial step to point B
                real *xb = static_cast<real *>(as_rvec_array(sb->s.x.data())[0]);
                for (i = 0; i < n; i++)
                {
                    xb[i] = lastx[i] + b*s[i];
                }

                neval++;
                // Calculate energy for the trial step in point B
                evaluate_energy(fplog, cr,
                                top_global, sb, top,
                                inputrec, nrnb, wcycle, gstat,
                                vsite, constr, fcd, graph, mdAtoms, fr,
                                mu_tot, enerd, vir, pres, step, FALSE);
                fnorm = sb->fnorm;

                // Calculate gradient in point B
                real *fb = static_cast<real *>(as_rvec_array(sb->f.data())[0]);
                for (gpb = 0, i = 0; i < n; i++)
                {
                    gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */

                }
                /* Sum the gradient along the line across CPUs */
                if (PAR(cr))
                {
                    gmx_sumd(1, &gpb, cr);
Loading
Loading full blame...