diff --git a/patches/gromacs-2016-beta1.config b/patches/gromacs-2016-beta1.config new file mode 100644 index 0000000000000000000000000000000000000000..03f65f9e10a72823dcc4648c695166cffa631af6 --- /dev/null +++ b/patches/gromacs-2016-beta1.config @@ -0,0 +1,24 @@ + + +function plumed_preliminary_test(){ +# check if the README contains the word GROMACS and if gromacs has been already configured + grep -q GROMACS README 1>/dev/null 2>/dev/null +} + +function plumed_patch_info(){ +cat << EOF + +PLUMED can be incorporated into gromacs using the standard patching procedure. +Patching must be done in the gromacs root directory _before_ the cmake command is invoked. + +To enable PLUMED in a gromacs simulation one should use +mdrun with an extra -plumed flag. The flag can be used to +specify the name of the PLUMED input file, e.g.: + +gmx mdrun -plumed plumed.dat + +For more information on gromacs you should visit http://www.gromacs.org + +EOF +} + diff --git a/patches/gromacs-2016-beta1.diff/src/gromacs/CMakeLists.txt b/patches/gromacs-2016-beta1.diff/src/gromacs/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2f8a59296e79cbc4c9f1b47034389ac8c2730816 --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/gromacs/CMakeLists.txt @@ -0,0 +1,261 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by +# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, +# and including many others, as listed in the AUTHORS file in the +# top-level source directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. + +include(${CMAKE_SOURCE_DIR}/Plumed.cmake) + +set(LIBGROMACS_SOURCES) + +set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS) + +function (_gmx_add_files_to_property PROPERTY) + foreach (_file ${ARGN}) + if (IS_ABSOLUTE "${_file}") + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file}) + else() + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} + ${CMAKE_CURRENT_LIST_DIR}/${_file}) + endif() + endforeach() +endfunction () + +function (gmx_add_libgromacs_sources) + _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN}) +endfunction () + +function (gmx_install_headers) + if (NOT GMX_BUILD_MDRUN_ONLY) + file(RELATIVE_PATH _dest ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_LIST_DIR}) + install(FILES ${ARGN} + DESTINATION "${INCL_INSTALL_DIR}/${_dest}" + COMPONENT development) + endif() + _gmx_add_files_to_property(GMX_INSTALLED_HEADERS ${ARGN}) +endfunction () + +function (gmx_write_installed_header_list) + get_property(_list GLOBAL PROPERTY GMX_INSTALLED_HEADERS) + string(REPLACE ";" "\n" _list "${_list}") + # TODO: Make this only update the file timestamp if the contents actually change. + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt "${_list}") +endfunction() + +if(GMX_USE_TNG) + option(GMX_EXTERNAL_TNG "Use external TNG instead of compiling the version shipped with GROMACS." + OFF) + # Detect TNG if GMX_EXTERNAL_TNG is explicitly ON + if(GMX_EXTERNAL_TNG) + find_package(TNG_IO 1.6.0) + if(NOT TNG_IO_FOUND) + message(FATAL_ERROR + "TNG >= 1.6.0 not found. " + "You can set GMX_EXTERNAL_TNG=OFF to compile TNG.") + endif() + include_directories(SYSTEM ${TNG_IO_INCLUDE_DIRS}) + endif() + if(NOT GMX_EXTERNAL_TNG) + include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake) + tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS) + list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES}) + tng_set_source_properties(WITH_ZLIB ${HAVE_ZLIB}) + + if (HAVE_ZLIB) + list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES}) + include_directories(SYSTEM ${ZLIB_INCLUDE_DIRS}) + endif() + endif() +else() + # We still need to get tng/tng_io_fwd.h from somewhere! + include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include) +endif() + +add_subdirectory(gmxlib) +add_subdirectory(mdlib) +add_subdirectory(listed-forces) +add_subdirectory(commandline) +add_subdirectory(domdec) +add_subdirectory(ewald) +add_subdirectory(fft) +add_subdirectory(gpu_utils) +add_subdirectory(hardware) +add_subdirectory(linearalgebra) +add_subdirectory(math) +add_subdirectory(mdrunutility) +add_subdirectory(mdtypes) +add_subdirectory(onlinehelp) +add_subdirectory(options) +add_subdirectory(pbcutil) +add_subdirectory(random) +add_subdirectory(tables) +add_subdirectory(timing) +add_subdirectory(topology) +add_subdirectory(trajectory) +add_subdirectory(utility) +add_subdirectory(fileio) +add_subdirectory(swap) +add_subdirectory(essentialdynamics) +add_subdirectory(pulling) +add_subdirectory(simd) +add_subdirectory(imd) +if (NOT GMX_BUILD_MDRUN_ONLY) + add_subdirectory(gmxana) + add_subdirectory(gmxpreprocess) + add_subdirectory(correlationfunctions) + add_subdirectory(statistics) + add_subdirectory(analysisdata) + add_subdirectory(selection) + add_subdirectory(trajectoryanalysis) + add_subdirectory(tools) +endif() + +get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES}) + +# This would be the standard way to include thread_mpi, but +# we want libgromacs to link the functions directly +#if(GMX_THREAD_MPI) +# add_subdirectory(thread_mpi) +#endif() +#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) + +tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src) +list(APPEND LIBGROMACS_SOURCES ${THREAD_MPI_SOURCES}) + +configure_file(version.h.cmakein version.h) +gmx_install_headers( + analysisdata.h + commandline.h + options.h + random.h + selection.h + trajectoryanalysis.h + utility.h + ${CMAKE_CURRENT_BINARY_DIR}/version.h + ) + +# This code is here instead of utility/CMakeLists.txt, because CMake +# custom commands and source file properties can only be set in the directory +# that contains the target that uses them. +# TODO: Generate a header instead that can be included from baseversion.c. +# That probably simplifies things somewhat. +set(GENERATED_VERSION_FILE utility/baseversion-gen.c) +gmx_configure_version_file( + utility/baseversion-gen.c.cmakein ${GENERATED_VERSION_FILE} + REMOTE_HASH SOURCE_FILE) +list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) + +if (GMX_USE_CUDA) + cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) +else() + add_library(libgromacs ${LIBGROMACS_SOURCES}) +endif() + +# Recent versions of gcc and clang give warnings on scanner.cpp, which +# is a generated source file. These are awkward to suppress inline, so +# we do it in the compilation command (after testing that the compiler +# supports the suppressions). +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag(-Wno-unused-parameter HAS_NO_UNUSED_PARAMETER) +if (HAS_NO_UNUSED_PARAMETER) + set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-unused-parameter") +endif() +check_cxx_compiler_flag(-Wno-deprecated-register HAS_NO_DEPRECATED_REGISTER) +if (HAS_NO_DEPRECATED_REGISTER) + set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated-register") +else() + check_cxx_compiler_flag(-Wno-deprecated HAS_NO_DEPRECATED) + if (HAS_NO_DEPRECATED) + set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated") + endif() +endif() +set_source_files_properties(selection/scanner.cpp PROPERTIES COMPILE_FLAGS "${_scanner_cpp_compiler_flags}") + +target_link_libraries(libgromacs ${PLUMED_LOAD}) + +target_link_libraries(libgromacs + ${EXTRAE_LIBRARIES} + ${GMX_EXTRA_LIBRARIES} + ${TNG_IO_LIBRARIES} + ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} + ${XML_LIBRARIES} + ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${OPENCL_LIBRARIES} + ${GMX_STDLIB_LIBRARIES}) +set_target_properties(libgromacs PROPERTIES + OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" + SOVERSION ${LIBRARY_SOVERSION_MAJOR} + VERSION ${LIBRARY_VERSION} + COMPILE_FLAGS "${OpenMP_C_FLAGS}") + +gmx_write_installed_header_list() + +# Only install the library in mdrun-only mode if it is actually necessary +# for the binary +if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) + install(TARGETS libgromacs + EXPORT libgromacs + LIBRARY DESTINATION ${LIB_INSTALL_DIR} + RUNTIME DESTINATION ${BIN_INSTALL_DIR} + ARCHIVE DESTINATION ${LIB_INSTALL_DIR} + COMPONENT libraries) +endif() + +if (NOT GMX_BUILD_MDRUN_ONLY) + include(InstallLibInfo.cmake) +endif() + +# Technically, the user could want to do this for an OpenCL build +# using the CUDA runtime, but currently there's no reason to want to +# do that. +if (INSTALL_CUDART_LIB) #can be set manual by user + if (GMX_USE_CUDA) + foreach(CUDA_LIB ${CUDA_LIBRARIES}) + string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) + if(IS_CUDART) #libcuda should not be installed + #install also name-links (linker uses those) + file(GLOB CUDA_LIBS ${CUDA_LIB}*) + install(FILES ${CUDA_LIBS} DESTINATION + ${LIB_INSTALL_DIR} COMPONENT libraries) + endif() + endforeach() + else() + message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support") + endif() +endif() + +if(GMX_USE_OPENCL) + set(OPENCL_KERNELS ${MDLIB_OPENCL_KERNELS}) + + install(FILES ${OPENCL_KERNELS} DESTINATION + ${OCL_INSTALL_DIR} COMPONENT libraries) +endif() diff --git a/patches/gromacs-2016-beta1.diff/src/gromacs/CMakeLists.txt.preplumed b/patches/gromacs-2016-beta1.diff/src/gromacs/CMakeLists.txt.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..edc051fe8328dcb0f2ff5604048c6e1a07bc66b1 --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/gromacs/CMakeLists.txt.preplumed @@ -0,0 +1,257 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by +# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, +# and including many others, as listed in the AUTHORS file in the +# top-level source directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. + +set(LIBGROMACS_SOURCES) + +set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS) + +function (_gmx_add_files_to_property PROPERTY) + foreach (_file ${ARGN}) + if (IS_ABSOLUTE "${_file}") + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file}) + else() + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} + ${CMAKE_CURRENT_LIST_DIR}/${_file}) + endif() + endforeach() +endfunction () + +function (gmx_add_libgromacs_sources) + _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN}) +endfunction () + +function (gmx_install_headers) + if (NOT GMX_BUILD_MDRUN_ONLY) + file(RELATIVE_PATH _dest ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_LIST_DIR}) + install(FILES ${ARGN} + DESTINATION "${INCL_INSTALL_DIR}/${_dest}" + COMPONENT development) + endif() + _gmx_add_files_to_property(GMX_INSTALLED_HEADERS ${ARGN}) +endfunction () + +function (gmx_write_installed_header_list) + get_property(_list GLOBAL PROPERTY GMX_INSTALLED_HEADERS) + string(REPLACE ";" "\n" _list "${_list}") + # TODO: Make this only update the file timestamp if the contents actually change. + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt "${_list}") +endfunction() + +if(GMX_USE_TNG) + option(GMX_EXTERNAL_TNG "Use external TNG instead of compiling the version shipped with GROMACS." + OFF) + # Detect TNG if GMX_EXTERNAL_TNG is explicitly ON + if(GMX_EXTERNAL_TNG) + find_package(TNG_IO 1.6.0) + if(NOT TNG_IO_FOUND) + message(FATAL_ERROR + "TNG >= 1.6.0 not found. " + "You can set GMX_EXTERNAL_TNG=OFF to compile TNG.") + endif() + include_directories(SYSTEM ${TNG_IO_INCLUDE_DIRS}) + endif() + if(NOT GMX_EXTERNAL_TNG) + include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake) + tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS) + list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES}) + tng_set_source_properties(WITH_ZLIB ${HAVE_ZLIB}) + + if (HAVE_ZLIB) + list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES}) + include_directories(SYSTEM ${ZLIB_INCLUDE_DIRS}) + endif() + endif() +else() + # We still need to get tng/tng_io_fwd.h from somewhere! + include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include) +endif() + +add_subdirectory(gmxlib) +add_subdirectory(mdlib) +add_subdirectory(listed-forces) +add_subdirectory(commandline) +add_subdirectory(domdec) +add_subdirectory(ewald) +add_subdirectory(fft) +add_subdirectory(gpu_utils) +add_subdirectory(hardware) +add_subdirectory(linearalgebra) +add_subdirectory(math) +add_subdirectory(mdrunutility) +add_subdirectory(mdtypes) +add_subdirectory(onlinehelp) +add_subdirectory(options) +add_subdirectory(pbcutil) +add_subdirectory(random) +add_subdirectory(tables) +add_subdirectory(timing) +add_subdirectory(topology) +add_subdirectory(trajectory) +add_subdirectory(utility) +add_subdirectory(fileio) +add_subdirectory(swap) +add_subdirectory(essentialdynamics) +add_subdirectory(pulling) +add_subdirectory(simd) +add_subdirectory(imd) +if (NOT GMX_BUILD_MDRUN_ONLY) + add_subdirectory(gmxana) + add_subdirectory(gmxpreprocess) + add_subdirectory(correlationfunctions) + add_subdirectory(statistics) + add_subdirectory(analysisdata) + add_subdirectory(selection) + add_subdirectory(trajectoryanalysis) + add_subdirectory(tools) +endif() + +get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES}) + +# This would be the standard way to include thread_mpi, but +# we want libgromacs to link the functions directly +#if(GMX_THREAD_MPI) +# add_subdirectory(thread_mpi) +#endif() +#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) + +tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src) +list(APPEND LIBGROMACS_SOURCES ${THREAD_MPI_SOURCES}) + +configure_file(version.h.cmakein version.h) +gmx_install_headers( + analysisdata.h + commandline.h + options.h + random.h + selection.h + trajectoryanalysis.h + utility.h + ${CMAKE_CURRENT_BINARY_DIR}/version.h + ) + +# This code is here instead of utility/CMakeLists.txt, because CMake +# custom commands and source file properties can only be set in the directory +# that contains the target that uses them. +# TODO: Generate a header instead that can be included from baseversion.c. +# That probably simplifies things somewhat. +set(GENERATED_VERSION_FILE utility/baseversion-gen.c) +gmx_configure_version_file( + utility/baseversion-gen.c.cmakein ${GENERATED_VERSION_FILE} + REMOTE_HASH SOURCE_FILE) +list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) + +if (GMX_USE_CUDA) + cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) +else() + add_library(libgromacs ${LIBGROMACS_SOURCES}) +endif() + +# Recent versions of gcc and clang give warnings on scanner.cpp, which +# is a generated source file. These are awkward to suppress inline, so +# we do it in the compilation command (after testing that the compiler +# supports the suppressions). +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag(-Wno-unused-parameter HAS_NO_UNUSED_PARAMETER) +if (HAS_NO_UNUSED_PARAMETER) + set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-unused-parameter") +endif() +check_cxx_compiler_flag(-Wno-deprecated-register HAS_NO_DEPRECATED_REGISTER) +if (HAS_NO_DEPRECATED_REGISTER) + set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated-register") +else() + check_cxx_compiler_flag(-Wno-deprecated HAS_NO_DEPRECATED) + if (HAS_NO_DEPRECATED) + set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated") + endif() +endif() +set_source_files_properties(selection/scanner.cpp PROPERTIES COMPILE_FLAGS "${_scanner_cpp_compiler_flags}") + +target_link_libraries(libgromacs + ${EXTRAE_LIBRARIES} + ${GMX_EXTRA_LIBRARIES} + ${TNG_IO_LIBRARIES} + ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} + ${XML_LIBRARIES} + ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${OPENCL_LIBRARIES} + ${GMX_STDLIB_LIBRARIES}) +set_target_properties(libgromacs PROPERTIES + OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" + SOVERSION ${LIBRARY_SOVERSION_MAJOR} + VERSION ${LIBRARY_VERSION} + COMPILE_FLAGS "${OpenMP_C_FLAGS}") + +gmx_write_installed_header_list() + +# Only install the library in mdrun-only mode if it is actually necessary +# for the binary +if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) + install(TARGETS libgromacs + EXPORT libgromacs + LIBRARY DESTINATION ${LIB_INSTALL_DIR} + RUNTIME DESTINATION ${BIN_INSTALL_DIR} + ARCHIVE DESTINATION ${LIB_INSTALL_DIR} + COMPONENT libraries) +endif() + +if (NOT GMX_BUILD_MDRUN_ONLY) + include(InstallLibInfo.cmake) +endif() + +# Technically, the user could want to do this for an OpenCL build +# using the CUDA runtime, but currently there's no reason to want to +# do that. +if (INSTALL_CUDART_LIB) #can be set manual by user + if (GMX_USE_CUDA) + foreach(CUDA_LIB ${CUDA_LIBRARIES}) + string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) + if(IS_CUDART) #libcuda should not be installed + #install also name-links (linker uses those) + file(GLOB CUDA_LIBS ${CUDA_LIB}*) + install(FILES ${CUDA_LIBS} DESTINATION + ${LIB_INSTALL_DIR} COMPONENT libraries) + endif() + endforeach() + else() + message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support") + endif() +endif() + +if(GMX_USE_OPENCL) + set(OPENCL_KERNELS ${MDLIB_OPENCL_KERNELS}) + + install(FILES ${OPENCL_KERNELS} DESTINATION + ${OCL_INSTALL_DIR} COMPONENT libraries) +endif() diff --git a/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/force.cpp b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/force.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3bee45971a7e763451ebb77a43f40c0c2f2cb3eb --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/force.cpp @@ -0,0 +1,913 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#include "gmxpre.h" + +#include "force.h" + +#include "config.h" + +#include <assert.h> +#include <math.h> +#include <string.h> + +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/ewald.h" +#include "gromacs/ewald/long-range-correction.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gmxlib/nonbonded/nonbonded.h" +#include "gromacs/listed-forces/listed-forces.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vecdump.h" +#include "gromacs/mdlib/forcerec-threading.h" +#include "gromacs/mdlib/genborn.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" +/* PLUMED */ +#include "../../../Plumed.h" +int plumedswitch=0; +plumed plumedmain; +void(*plumedcmd)(plumed,const char*,const void*)=NULL; +/* END PLUMED */ + + +void ns(FILE *fp, + t_forcerec *fr, + matrix box, + gmx_groups_t *groups, + gmx_localtop_t *top, + t_mdatoms *md, + t_commrec *cr, + t_nrnb *nrnb, + gmx_bool bFillGrid) +{ + int nsearch; + + + if (!fr->ns->nblist_initialized) + { + init_neighbor_list(fp, fr, md->homenr); + } + + nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md, + bFillGrid); + if (debug) + { + fprintf(debug, "nsearch = %d\n", nsearch); + } + + /* Check whether we have to do dynamic load balancing */ + /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) + count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, + &(top->idef),opts->ngener); + */ + if (fr->ns->dump_nl > 0) + { + dump_nblist(fp, cr, fr, fr->ns->dump_nl); + } +} + +static void reduce_thread_energies(tensor vir_q, tensor vir_lj, + real *Vcorr_q, real *Vcorr_lj, + real *dvdl_q, real *dvdl_lj, + int nthreads, + ewald_corr_thread_t *ewc_t) +{ + int t; + + for (t = 1; t < nthreads; t++) + { + *Vcorr_q += ewc_t[t].Vcorr_q; + *Vcorr_lj += ewc_t[t].Vcorr_lj; + *dvdl_q += ewc_t[t].dvdl[efptCOUL]; + *dvdl_lj += ewc_t[t].dvdl[efptVDW]; + m_add(vir_q, ewc_t[t].vir_q, vir_q); + m_add(vir_lj, ewc_t[t].vir_lj, vir_lj); + } +} + +void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, + t_idef *idef, t_commrec *cr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + t_mdatoms *md, + rvec x[], history_t *hist, + rvec f[], + gmx_enerdata_t *enerd, + t_fcdata *fcd, + gmx_localtop_t *top, + gmx_genborn_t *born, + gmx_bool bBornRadii, + matrix box, + t_lambda *fepvals, + real *lambda, + t_graph *graph, + t_blocka *excl, + rvec mu_tot[], + int flags, + float *cycles_pme) +{ + int i, j; + int donb_flags; + gmx_bool bSB; + int pme_flags; + matrix boxs; + rvec box_size; + t_pbc pbc; + real dvdl_dum[efptNR], dvdl_nb[efptNR]; + +#if GMX_MPI + double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ +#endif + + set_pbc(&pbc, fr->ePBC, box); + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + dvdl_dum[i] = 0; + } + + /* Reset box */ + for (i = 0; (i < DIM); i++) + { + box_size[i] = box[i][i]; + } + + /* do QMMM first if requested */ + if (fr->bQMMM) + { + enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); + } + + /* Call the short range functions all in one go. */ + +#if GMX_MPI + /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ +#define TAKETIME FALSE + if (TAKETIME) + { + MPI_Barrier(cr->mpi_comm_mygroup); + t0 = MPI_Wtime(); + } +#endif + + if (ir->nwall) + { + /* foreign lambda component for walls */ + real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], + enerd->grpp.ener[egLJSR], nrnb); + enerd->dvdl_lin[efptVDW] += dvdl_walls; + } + + /* If doing GB, reset dvda and calculate the Born radii */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsNONBONDED); + + for (i = 0; i < born->nr; i++) + { + fr->dvda[i] = 0; + } + + if (bBornRadii) + { + calc_gb_rad(cr, fr, ir, top, x, fr->gblist, born, md, nrnb); + } + + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + + where(); + /* We only do non-bonded calculation with group scheme here, the verlet + * calls are done from do_force_cutsVERLET(). */ + if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) + { + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + /* Currently all group scheme kernels always calculate (shift-)forces */ + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_VIRIAL) + { + donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + + wallcycle_sub_start(wcycle, ewcsNONBONDED); + do_nonbonded(fr, x, f, md, excl, + &enerd->grpp, nrnb, + lambda, dvdl_nb, -1, -1, donb_flags); + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + real lam_i[efptNR]; + + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); + do_nonbonded(fr, x, f, md, excl, + &(enerd->foreign_grpp), nrnb, + lam_i, dvdl_dum, -1, -1, + (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); + sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + where(); + } + + /* If we are doing GB, calculate bonded forces and apply corrections + * to the solvation forces */ + /* MRS: Eventually, many need to include free energy contribution here! */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsLISTED); + calc_gb_forces(cr, md, born, top, x, f, fr, idef, + ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); + wallcycle_sub_stop(wcycle, ewcsLISTED); + } + +#if GMX_MPI + if (TAKETIME) + { + t1 = MPI_Wtime(); + fr->t_fnbf += t1-t0; + } +#endif + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + } + + if (fepvals->sc_alpha != 0) + + /* even though coulomb part is linear, we already added it, beacuse we + need to go through the vdw calculation anyway */ + { + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + if (debug) + { + pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); + } + + /* Shift the coordinates. Must be done before listed forces and PPPM, + * but is also necessary for SHAKE and update, therefore it can NOT + * go when no listed forces have to be evaluated. + * + * The shifting and PBC code is deliberately not timed, since with + * the Verlet scheme it only takes non-zero time with triclinic + * boxes, and even then the time is around a factor of 100 less + * than the next smallest counter. + */ + + + /* Here sometimes we would not need to shift with NBFonly, + * but we do so anyhow for consistency of the returned coordinates. + */ + if (graph) + { + shift_self(graph, box, x); + if (TRICLINIC(box)) + { + inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); + } + else + { + inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); + } + } + /* Check whether we need to do listed interactions or correct for exclusions */ + if (fr->bMolPBC && + ((flags & GMX_FORCE_LISTED) + || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) + { + /* TODO There are no electrostatics methods that require this + transformation, when using the Verlet scheme, so update the + above conditional. */ + /* Since all atoms are in the rectangular or triclinic unit-cell, + * only single box vector shifts (2 in x) are required. + */ + set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr, + TRUE, box); + } + + do_force_listed(wcycle, box, ir->fepvals, cr->ms, + idef, (const rvec *) x, hist, f, fr, + &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, + flags); + + where(); + + *cycles_pme = 0; + clear_mat(fr->vir_el_recip); + clear_mat(fr->vir_lj_recip); + + /* Do long-range electrostatics and/or LJ-PME, including related short-range + * corrections. + */ + if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) + { + int status = 0; + real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; + real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; + + bSB = (ir->nwall == 2); + if (bSB) + { + copy_mat(box, boxs); + svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); + box_size[ZZ] *= ir->wall_ewald_zfac; + } + + if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) + { + real dvdl_long_range_correction_q = 0; + real dvdl_long_range_correction_lj = 0; + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + /* The TPI molecule does not have exclusions with the rest + * of the system and no intra-molecular PME grid + * contributions will be calculated in + * gmx_pme_calc_energy. + */ + if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || + ir->ewald_geometry != eewg3D || + ir->epsilon_surface != 0) + { + int nthreads, t; + + wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); + + if (fr->n_tpi > 0) + { + gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); + } + + nthreads = fr->nthread_ewc; +#pragma omp parallel for num_threads(nthreads) schedule(static) + for (t = 0; t < nthreads; t++) + { + try + { + tensor *vir_q, *vir_lj; + real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; + if (t == 0) + { + vir_q = &fr->vir_el_recip; + vir_lj = &fr->vir_lj_recip; + Vcorrt_q = &Vcorr_q; + Vcorrt_lj = &Vcorr_lj; + dvdlt_q = &dvdl_long_range_correction_q; + dvdlt_lj = &dvdl_long_range_correction_lj; + } + else + { + vir_q = &fr->ewc_t[t].vir_q; + vir_lj = &fr->ewc_t[t].vir_lj; + Vcorrt_q = &fr->ewc_t[t].Vcorr_q; + Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj; + dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL]; + dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW]; + clear_mat(*vir_q); + clear_mat(*vir_lj); + } + *dvdlt_q = 0; + *dvdlt_lj = 0; + + /* Threading is only supported with the Verlet cut-off + * scheme and then only single particle forces (no + * exclusion forces) are calculated, so we can store + * the forces in the normal, single fr->f_novirsum array. + */ + ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], + cr, t, fr, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + md->sigma3A, md->sigma3B, + md->nChargePerturbed || md->nTypePerturbed, + ir->cutoff_scheme != ecutsVERLET, + excl, x, bSB ? boxs : box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + fr->f_novirsum, *vir_q, *vir_lj, + Vcorrt_q, Vcorrt_lj, + lambda[efptCOUL], lambda[efptVDW], + dvdlt_q, dvdlt_lj); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + if (nthreads > 1) + { + reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip, + &Vcorr_q, &Vcorr_lj, + &dvdl_long_range_correction_q, + &dvdl_long_range_correction_lj, + nthreads, fr->ewc_t); + } + wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); + } + + if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) + { + /* This is not in a subcounter because it takes a + negligible and constant-sized amount of time */ + Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, + &dvdl_long_range_correction_q, + fr->vir_el_recip); + } + + enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; + enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; + + if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME)) + { + /* Do reciprocal PME for Coulomb and/or LJ. */ + assert(fr->n_tpi >= 0); + if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) + { + pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; + if (EEL_PME(fr->eeltype)) + { + pme_flags |= GMX_PME_DO_COULOMB; + } + if (EVDW_PME(fr->vdwtype)) + { + pme_flags |= GMX_PME_DO_LJ; + } + if (flags & GMX_FORCE_FORCES) + { + pme_flags |= GMX_PME_CALC_F; + } + if (flags & GMX_FORCE_VIRIAL) + { + pme_flags |= GMX_PME_CALC_ENER_VIR; + } + if (fr->n_tpi > 0) + { + /* We don't calculate f, but we do want the potential */ + pme_flags |= GMX_PME_CALC_POT; + } + wallcycle_start(wcycle, ewcPMEMESH); + status = gmx_pme_do(fr->pmedata, + 0, md->homenr - fr->n_tpi, + x, fr->f_novirsum, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + bSB ? boxs : box, cr, + DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, + DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, + nrnb, wcycle, + fr->vir_el_recip, fr->ewaldcoeff_q, + fr->vir_lj_recip, fr->ewaldcoeff_lj, + &Vlr_q, &Vlr_lj, + lambda[efptCOUL], lambda[efptVDW], + &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); + *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); + if (status != 0) + { + gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); + } + /* We should try to do as little computation after + * this as possible, because parallel PME synchronizes + * the nodes, so we want all load imbalance of the + * rest of the force calculation to be before the PME + * call. DD load balancing is done on the whole time + * of the force call (without PME). + */ + } + if (fr->n_tpi > 0) + { + if (EVDW_PME(ir->vdwtype)) + { + + gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); + } + /* Determine the PME grid energy of the test molecule + * with the PME grid potential of the other charges. + */ + gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, + x + md->homenr - fr->n_tpi, + md->chargeA + md->homenr - fr->n_tpi, + &Vlr_q); + } + } + } + + if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) + { + Vlr_q = do_ewald(ir, x, fr->f_novirsum, + md->chargeA, md->chargeB, + box_size, cr, md->homenr, + fr->vir_el_recip, fr->ewaldcoeff_q, + lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); + } + + /* Note that with separate PME nodes we get the real energies later */ + enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; + enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; + enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; + enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; + if (debug) + { + fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", + Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); + pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); + pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", + Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); + pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); + } + } + else + { + /* Is there a reaction-field exclusion correction needed? + * With the Verlet scheme, exclusion forces are calculated + * in the non-bonded kernel. + */ + if (ir->cutoff_scheme != ecutsVERLET && EEL_RF(fr->eeltype)) + { + real dvdl_rf_excl = 0; + enerd->term[F_RF_EXCL] = + RF_excl_correction(fr, graph, md, excl, x, f, + fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); + + enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; + } + } + where(); + + if (debug) + { + print_nrnb(debug, nrnb); + } + +#if GMX_MPI + if (TAKETIME) + { + t2 = MPI_Wtime(); + MPI_Barrier(cr->mpi_comm_mygroup); + t3 = MPI_Wtime(); + fr->t_wait += t3-t2; + if (fr->timesteps == 11) + { + char buf[22]; + fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", + cr->nodeid, gmx_step_str(fr->timesteps, buf), + 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), + (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); + } + fr->timesteps++; + } +#endif + + if (debug) + { + pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + } + + /* PLUMED */ + if(plumedswitch){ + int plumedNeedsEnergy; + (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); + if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL); + } + /* END PLUMED */ +} + +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) +{ + int i, n2; + + for (i = 0; i < F_NRE; i++) + { + enerd->term[i] = 0; + enerd->foreign_term[i] = 0; + } + + + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0; + enerd->dvdl_nonlin[i] = 0; + } + + n2 = ngener*ngener; + if (debug) + { + fprintf(debug, "Creating %d sized group matrix for energies\n", n2); + } + enerd->grpp.nener = n2; + enerd->foreign_grpp.nener = n2; + for (i = 0; (i < egNR); i++) + { + snew(enerd->grpp.ener[i], n2); + snew(enerd->foreign_grpp.ener[i], n2); + } + + if (n_lambda) + { + enerd->n_lambda = 1 + n_lambda; + snew(enerd->enerpart_lambda, enerd->n_lambda); + } + else + { + enerd->n_lambda = 0; + } +} + +void destroy_enerdata(gmx_enerdata_t *enerd) +{ + int i; + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->grpp.ener[i]); + } + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->foreign_grpp.ener[i]); + } + + if (enerd->n_lambda) + { + sfree(enerd->enerpart_lambda); + } +} + +static real sum_v(int n, real v[]) +{ + real t; + int i; + + t = 0.0; + for (i = 0; (i < n); i++) + { + t = t + v[i]; + } + + return t; +} + +void sum_epot(gmx_grppairener_t *grpp, real *epot) +{ + int i; + + /* Accumulate energies */ + epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); + epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); + epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); + epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); + /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ + epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); + +/* lattice part of LR doesnt belong to any group + * and has been added earlier + */ + epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); + + epot[F_EPOT] = 0; + for (i = 0; (i < F_EPOT); i++) + { + if (i != F_DISRESVIOL && i != F_ORIRESDEV) + { + epot[F_EPOT] += epot[i]; + } + } +} + +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) +{ + int i, j, index; + double dlam; + + enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ + enerd->term[F_DVDL] = 0.0; + for (i = 0; i < efptNR; i++) + { + if (fepvals->separate_dvdl[i]) + { + /* could this be done more readably/compactly? */ + switch (i) + { + case (efptMASS): + index = F_DKDL; + break; + case (efptCOUL): + index = F_DVDL_COUL; + break; + case (efptVDW): + index = F_DVDL_VDW; + break; + case (efptBONDED): + index = F_DVDL_BONDED; + break; + case (efptRESTRAINT): + index = F_DVDL_RESTRAINT; + break; + default: + index = F_DVDL; + break; + } + enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + else + { + enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + } + + /* Notes on the foreign lambda free energy difference evaluation: + * Adding the potential and ekin terms that depend linearly on lambda + * as delta lam * dvdl to the energy differences is exact. + * For the constraints this is not exact, but we have no other option + * without literally changing the lengths and reevaluating the energies at each step. + * (try to remedy this post 4.6 - MRS) + */ + if (fepvals->separate_dvdl[efptBONDED]) + { + enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; + } + else + { + enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; + } + enerd->term[F_DVDL_CONSTR] = 0; + + for (i = 0; i < fepvals->n_lambda; i++) + { + /* note we are iterating over fepvals here! + For the current lam, dlam = 0 automatically, + so we don't need to add anything to the + enerd->enerpart_lambda[0] */ + + /* we don't need to worry about dvdl_lin contributions to dE at + current lambda, because the contributions to the current + lambda are automatically zeroed */ + + for (j = 0; j < efptNR; j++) + { + /* Note that this loop is over all dhdl components, not just the separated ones */ + dlam = (fepvals->all_lambda[j][i]-lambda[j]); + enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; + if (debug) + { + fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", + fepvals->all_lambda[j][i], efpt_names[j], + (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), + dlam, enerd->dvdl_lin[j]); + } + } + } +} + + +void reset_foreign_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all foreign energy components. Foreign energies always called on + neighbor search steps */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->foreign_grpp.ener[i][j] = 0.0; + } + } + + /* potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->foreign_term[i] = 0.0; + } +} + +void reset_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all energy components. */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->grpp.ener[i][j] = 0.0; + } + } + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0.0; + enerd->dvdl_nonlin[i] = 0.0; + } + + /* Normal potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->term[i] = 0.0; + } + enerd->term[F_DVDL] = 0.0; + enerd->term[F_DVDL_COUL] = 0.0; + enerd->term[F_DVDL_VDW] = 0.0; + enerd->term[F_DVDL_BONDED] = 0.0; + enerd->term[F_DVDL_RESTRAINT] = 0.0; + enerd->term[F_DKDL] = 0.0; + if (enerd->n_lambda > 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + enerd->enerpart_lambda[i] = 0.0; + } + } + /* reset foreign energy data - separate function since we also call it elsewhere */ + reset_foreign_enerdata(enerd); +} diff --git a/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/force.cpp.preplumed b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/force.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..107e00a8e7b3393ec0eaf42a07f584d29c6ea4eb --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/force.cpp.preplumed @@ -0,0 +1,899 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#include "gmxpre.h" + +#include "force.h" + +#include "config.h" + +#include <assert.h> +#include <math.h> +#include <string.h> + +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/ewald.h" +#include "gromacs/ewald/long-range-correction.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gmxlib/nonbonded/nonbonded.h" +#include "gromacs/listed-forces/listed-forces.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vecdump.h" +#include "gromacs/mdlib/forcerec-threading.h" +#include "gromacs/mdlib/genborn.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" + +void ns(FILE *fp, + t_forcerec *fr, + matrix box, + gmx_groups_t *groups, + gmx_localtop_t *top, + t_mdatoms *md, + t_commrec *cr, + t_nrnb *nrnb, + gmx_bool bFillGrid) +{ + int nsearch; + + + if (!fr->ns->nblist_initialized) + { + init_neighbor_list(fp, fr, md->homenr); + } + + nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md, + bFillGrid); + if (debug) + { + fprintf(debug, "nsearch = %d\n", nsearch); + } + + /* Check whether we have to do dynamic load balancing */ + /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) + count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, + &(top->idef),opts->ngener); + */ + if (fr->ns->dump_nl > 0) + { + dump_nblist(fp, cr, fr, fr->ns->dump_nl); + } +} + +static void reduce_thread_energies(tensor vir_q, tensor vir_lj, + real *Vcorr_q, real *Vcorr_lj, + real *dvdl_q, real *dvdl_lj, + int nthreads, + ewald_corr_thread_t *ewc_t) +{ + int t; + + for (t = 1; t < nthreads; t++) + { + *Vcorr_q += ewc_t[t].Vcorr_q; + *Vcorr_lj += ewc_t[t].Vcorr_lj; + *dvdl_q += ewc_t[t].dvdl[efptCOUL]; + *dvdl_lj += ewc_t[t].dvdl[efptVDW]; + m_add(vir_q, ewc_t[t].vir_q, vir_q); + m_add(vir_lj, ewc_t[t].vir_lj, vir_lj); + } +} + +void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, + t_idef *idef, t_commrec *cr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + t_mdatoms *md, + rvec x[], history_t *hist, + rvec f[], + gmx_enerdata_t *enerd, + t_fcdata *fcd, + gmx_localtop_t *top, + gmx_genborn_t *born, + gmx_bool bBornRadii, + matrix box, + t_lambda *fepvals, + real *lambda, + t_graph *graph, + t_blocka *excl, + rvec mu_tot[], + int flags, + float *cycles_pme) +{ + int i, j; + int donb_flags; + gmx_bool bSB; + int pme_flags; + matrix boxs; + rvec box_size; + t_pbc pbc; + real dvdl_dum[efptNR], dvdl_nb[efptNR]; + +#if GMX_MPI + double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ +#endif + + set_pbc(&pbc, fr->ePBC, box); + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + dvdl_dum[i] = 0; + } + + /* Reset box */ + for (i = 0; (i < DIM); i++) + { + box_size[i] = box[i][i]; + } + + /* do QMMM first if requested */ + if (fr->bQMMM) + { + enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr); + } + + /* Call the short range functions all in one go. */ + +#if GMX_MPI + /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ +#define TAKETIME FALSE + if (TAKETIME) + { + MPI_Barrier(cr->mpi_comm_mygroup); + t0 = MPI_Wtime(); + } +#endif + + if (ir->nwall) + { + /* foreign lambda component for walls */ + real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], + enerd->grpp.ener[egLJSR], nrnb); + enerd->dvdl_lin[efptVDW] += dvdl_walls; + } + + /* If doing GB, reset dvda and calculate the Born radii */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsNONBONDED); + + for (i = 0; i < born->nr; i++) + { + fr->dvda[i] = 0; + } + + if (bBornRadii) + { + calc_gb_rad(cr, fr, ir, top, x, fr->gblist, born, md, nrnb); + } + + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + + where(); + /* We only do non-bonded calculation with group scheme here, the verlet + * calls are done from do_force_cutsVERLET(). */ + if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) + { + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + /* Currently all group scheme kernels always calculate (shift-)forces */ + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_VIRIAL) + { + donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + + wallcycle_sub_start(wcycle, ewcsNONBONDED); + do_nonbonded(fr, x, f, md, excl, + &enerd->grpp, nrnb, + lambda, dvdl_nb, -1, -1, donb_flags); + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + real lam_i[efptNR]; + + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); + do_nonbonded(fr, x, f, md, excl, + &(enerd->foreign_grpp), nrnb, + lam_i, dvdl_dum, -1, -1, + (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); + sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + where(); + } + + /* If we are doing GB, calculate bonded forces and apply corrections + * to the solvation forces */ + /* MRS: Eventually, many need to include free energy contribution here! */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsLISTED); + calc_gb_forces(cr, md, born, top, x, f, fr, idef, + ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd); + wallcycle_sub_stop(wcycle, ewcsLISTED); + } + +#if GMX_MPI + if (TAKETIME) + { + t1 = MPI_Wtime(); + fr->t_fnbf += t1-t0; + } +#endif + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + } + + if (fepvals->sc_alpha != 0) + + /* even though coulomb part is linear, we already added it, beacuse we + need to go through the vdw calculation anyway */ + { + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + if (debug) + { + pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); + } + + /* Shift the coordinates. Must be done before listed forces and PPPM, + * but is also necessary for SHAKE and update, therefore it can NOT + * go when no listed forces have to be evaluated. + * + * The shifting and PBC code is deliberately not timed, since with + * the Verlet scheme it only takes non-zero time with triclinic + * boxes, and even then the time is around a factor of 100 less + * than the next smallest counter. + */ + + + /* Here sometimes we would not need to shift with NBFonly, + * but we do so anyhow for consistency of the returned coordinates. + */ + if (graph) + { + shift_self(graph, box, x); + if (TRICLINIC(box)) + { + inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); + } + else + { + inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); + } + } + /* Check whether we need to do listed interactions or correct for exclusions */ + if (fr->bMolPBC && + ((flags & GMX_FORCE_LISTED) + || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))) + { + /* TODO There are no electrostatics methods that require this + transformation, when using the Verlet scheme, so update the + above conditional. */ + /* Since all atoms are in the rectangular or triclinic unit-cell, + * only single box vector shifts (2 in x) are required. + */ + set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr, + TRUE, box); + } + + do_force_listed(wcycle, box, ir->fepvals, cr->ms, + idef, (const rvec *) x, hist, f, fr, + &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, + flags); + + where(); + + *cycles_pme = 0; + clear_mat(fr->vir_el_recip); + clear_mat(fr->vir_lj_recip); + + /* Do long-range electrostatics and/or LJ-PME, including related short-range + * corrections. + */ + if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)) + { + int status = 0; + real Vlr_q = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0; + real dvdl_long_range_q = 0, dvdl_long_range_lj = 0; + + bSB = (ir->nwall == 2); + if (bSB) + { + copy_mat(box, boxs); + svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); + box_size[ZZ] *= ir->wall_ewald_zfac; + } + + if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype)) + { + real dvdl_long_range_correction_q = 0; + real dvdl_long_range_correction_lj = 0; + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + /* The TPI molecule does not have exclusions with the rest + * of the system and no intra-molecular PME grid + * contributions will be calculated in + * gmx_pme_calc_energy. + */ + if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || + ir->ewald_geometry != eewg3D || + ir->epsilon_surface != 0) + { + int nthreads, t; + + wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); + + if (fr->n_tpi > 0) + { + gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); + } + + nthreads = fr->nthread_ewc; +#pragma omp parallel for num_threads(nthreads) schedule(static) + for (t = 0; t < nthreads; t++) + { + try + { + tensor *vir_q, *vir_lj; + real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; + if (t == 0) + { + vir_q = &fr->vir_el_recip; + vir_lj = &fr->vir_lj_recip; + Vcorrt_q = &Vcorr_q; + Vcorrt_lj = &Vcorr_lj; + dvdlt_q = &dvdl_long_range_correction_q; + dvdlt_lj = &dvdl_long_range_correction_lj; + } + else + { + vir_q = &fr->ewc_t[t].vir_q; + vir_lj = &fr->ewc_t[t].vir_lj; + Vcorrt_q = &fr->ewc_t[t].Vcorr_q; + Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj; + dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL]; + dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW]; + clear_mat(*vir_q); + clear_mat(*vir_lj); + } + *dvdlt_q = 0; + *dvdlt_lj = 0; + + /* Threading is only supported with the Verlet cut-off + * scheme and then only single particle forces (no + * exclusion forces) are calculated, so we can store + * the forces in the normal, single fr->f_novirsum array. + */ + ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], + cr, t, fr, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + md->sigma3A, md->sigma3B, + md->nChargePerturbed || md->nTypePerturbed, + ir->cutoff_scheme != ecutsVERLET, + excl, x, bSB ? boxs : box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + fr->f_novirsum, *vir_q, *vir_lj, + Vcorrt_q, Vcorrt_lj, + lambda[efptCOUL], lambda[efptVDW], + dvdlt_q, dvdlt_lj); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + if (nthreads > 1) + { + reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip, + &Vcorr_q, &Vcorr_lj, + &dvdl_long_range_correction_q, + &dvdl_long_range_correction_lj, + nthreads, fr->ewc_t); + } + wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); + } + + if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0) + { + /* This is not in a subcounter because it takes a + negligible and constant-sized amount of time */ + Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, + &dvdl_long_range_correction_q, + fr->vir_el_recip); + } + + enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q; + enerd->dvdl_lin[efptVDW] += dvdl_long_range_correction_lj; + + if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME)) + { + /* Do reciprocal PME for Coulomb and/or LJ. */ + assert(fr->n_tpi >= 0); + if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) + { + pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; + if (EEL_PME(fr->eeltype)) + { + pme_flags |= GMX_PME_DO_COULOMB; + } + if (EVDW_PME(fr->vdwtype)) + { + pme_flags |= GMX_PME_DO_LJ; + } + if (flags & GMX_FORCE_FORCES) + { + pme_flags |= GMX_PME_CALC_F; + } + if (flags & GMX_FORCE_VIRIAL) + { + pme_flags |= GMX_PME_CALC_ENER_VIR; + } + if (fr->n_tpi > 0) + { + /* We don't calculate f, but we do want the potential */ + pme_flags |= GMX_PME_CALC_POT; + } + wallcycle_start(wcycle, ewcPMEMESH); + status = gmx_pme_do(fr->pmedata, + 0, md->homenr - fr->n_tpi, + x, fr->f_novirsum, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + bSB ? boxs : box, cr, + DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, + DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, + nrnb, wcycle, + fr->vir_el_recip, fr->ewaldcoeff_q, + fr->vir_lj_recip, fr->ewaldcoeff_lj, + &Vlr_q, &Vlr_lj, + lambda[efptCOUL], lambda[efptVDW], + &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags); + *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); + if (status != 0) + { + gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); + } + /* We should try to do as little computation after + * this as possible, because parallel PME synchronizes + * the nodes, so we want all load imbalance of the + * rest of the force calculation to be before the PME + * call. DD load balancing is done on the whole time + * of the force call (without PME). + */ + } + if (fr->n_tpi > 0) + { + if (EVDW_PME(ir->vdwtype)) + { + + gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); + } + /* Determine the PME grid energy of the test molecule + * with the PME grid potential of the other charges. + */ + gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, + x + md->homenr - fr->n_tpi, + md->chargeA + md->homenr - fr->n_tpi, + &Vlr_q); + } + } + } + + if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype)) + { + Vlr_q = do_ewald(ir, x, fr->f_novirsum, + md->chargeA, md->chargeB, + box_size, cr, md->homenr, + fr->vir_el_recip, fr->ewaldcoeff_q, + lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table); + } + + /* Note that with separate PME nodes we get the real energies later */ + enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q; + enerd->dvdl_lin[efptVDW] += dvdl_long_range_lj; + enerd->term[F_COUL_RECIP] = Vlr_q + Vcorr_q; + enerd->term[F_LJ_RECIP] = Vlr_lj + Vcorr_lj; + if (debug) + { + fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", + Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]); + pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); + pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", + Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]); + pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM); + } + } + else + { + /* Is there a reaction-field exclusion correction needed? + * With the Verlet scheme, exclusion forces are calculated + * in the non-bonded kernel. + */ + if (ir->cutoff_scheme != ecutsVERLET && EEL_RF(fr->eeltype)) + { + real dvdl_rf_excl = 0; + enerd->term[F_RF_EXCL] = + RF_excl_correction(fr, graph, md, excl, x, f, + fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); + + enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; + } + } + where(); + + if (debug) + { + print_nrnb(debug, nrnb); + } + +#if GMX_MPI + if (TAKETIME) + { + t2 = MPI_Wtime(); + MPI_Barrier(cr->mpi_comm_mygroup); + t3 = MPI_Wtime(); + fr->t_wait += t3-t2; + if (fr->timesteps == 11) + { + char buf[22]; + fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", + cr->nodeid, gmx_step_str(fr->timesteps, buf), + 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), + (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); + } + fr->timesteps++; + } +#endif + + if (debug) + { + pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + } + +} + +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) +{ + int i, n2; + + for (i = 0; i < F_NRE; i++) + { + enerd->term[i] = 0; + enerd->foreign_term[i] = 0; + } + + + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0; + enerd->dvdl_nonlin[i] = 0; + } + + n2 = ngener*ngener; + if (debug) + { + fprintf(debug, "Creating %d sized group matrix for energies\n", n2); + } + enerd->grpp.nener = n2; + enerd->foreign_grpp.nener = n2; + for (i = 0; (i < egNR); i++) + { + snew(enerd->grpp.ener[i], n2); + snew(enerd->foreign_grpp.ener[i], n2); + } + + if (n_lambda) + { + enerd->n_lambda = 1 + n_lambda; + snew(enerd->enerpart_lambda, enerd->n_lambda); + } + else + { + enerd->n_lambda = 0; + } +} + +void destroy_enerdata(gmx_enerdata_t *enerd) +{ + int i; + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->grpp.ener[i]); + } + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->foreign_grpp.ener[i]); + } + + if (enerd->n_lambda) + { + sfree(enerd->enerpart_lambda); + } +} + +static real sum_v(int n, real v[]) +{ + real t; + int i; + + t = 0.0; + for (i = 0; (i < n); i++) + { + t = t + v[i]; + } + + return t; +} + +void sum_epot(gmx_grppairener_t *grpp, real *epot) +{ + int i; + + /* Accumulate energies */ + epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); + epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); + epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); + epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); + /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ + epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); + +/* lattice part of LR doesnt belong to any group + * and has been added earlier + */ + epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); + + epot[F_EPOT] = 0; + for (i = 0; (i < F_EPOT); i++) + { + if (i != F_DISRESVIOL && i != F_ORIRESDEV) + { + epot[F_EPOT] += epot[i]; + } + } +} + +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) +{ + int i, j, index; + double dlam; + + enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ + enerd->term[F_DVDL] = 0.0; + for (i = 0; i < efptNR; i++) + { + if (fepvals->separate_dvdl[i]) + { + /* could this be done more readably/compactly? */ + switch (i) + { + case (efptMASS): + index = F_DKDL; + break; + case (efptCOUL): + index = F_DVDL_COUL; + break; + case (efptVDW): + index = F_DVDL_VDW; + break; + case (efptBONDED): + index = F_DVDL_BONDED; + break; + case (efptRESTRAINT): + index = F_DVDL_RESTRAINT; + break; + default: + index = F_DVDL; + break; + } + enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + else + { + enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + } + + /* Notes on the foreign lambda free energy difference evaluation: + * Adding the potential and ekin terms that depend linearly on lambda + * as delta lam * dvdl to the energy differences is exact. + * For the constraints this is not exact, but we have no other option + * without literally changing the lengths and reevaluating the energies at each step. + * (try to remedy this post 4.6 - MRS) + */ + if (fepvals->separate_dvdl[efptBONDED]) + { + enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; + } + else + { + enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; + } + enerd->term[F_DVDL_CONSTR] = 0; + + for (i = 0; i < fepvals->n_lambda; i++) + { + /* note we are iterating over fepvals here! + For the current lam, dlam = 0 automatically, + so we don't need to add anything to the + enerd->enerpart_lambda[0] */ + + /* we don't need to worry about dvdl_lin contributions to dE at + current lambda, because the contributions to the current + lambda are automatically zeroed */ + + for (j = 0; j < efptNR; j++) + { + /* Note that this loop is over all dhdl components, not just the separated ones */ + dlam = (fepvals->all_lambda[j][i]-lambda[j]); + enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; + if (debug) + { + fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", + fepvals->all_lambda[j][i], efpt_names[j], + (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), + dlam, enerd->dvdl_lin[j]); + } + } + } +} + + +void reset_foreign_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all foreign energy components. Foreign energies always called on + neighbor search steps */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->foreign_grpp.ener[i][j] = 0.0; + } + } + + /* potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->foreign_term[i] = 0.0; + } +} + +void reset_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all energy components. */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->grpp.ener[i][j] = 0.0; + } + } + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0.0; + enerd->dvdl_nonlin[i] = 0.0; + } + + /* Normal potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->term[i] = 0.0; + } + enerd->term[F_DVDL] = 0.0; + enerd->term[F_DVDL_COUL] = 0.0; + enerd->term[F_DVDL_VDW] = 0.0; + enerd->term[F_DVDL_BONDED] = 0.0; + enerd->term[F_DVDL_RESTRAINT] = 0.0; + enerd->term[F_DKDL] = 0.0; + if (enerd->n_lambda > 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + enerd->enerpart_lambda[i] = 0.0; + } + } + /* reset foreign energy data - separate function since we also call it elsewhere */ + reset_foreign_enerdata(enerd); +} diff --git a/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/minimize.cpp b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/minimize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3ce7832812fc7b72d446ab0c9985e23832d66fda --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/minimize.cpp @@ -0,0 +1,3128 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file defines integrators for energy minimization + * + * \author Berk Hess <hess@kth.se> + * \author Erik Lindahl <erik@kth.se> + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "minimize.h" + +#include "config.h" + +#include <cmath> +#include <cstring> +#include <ctime> + +#include <algorithm> +#include <vector> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/fileio/confio.h" +#include "gromacs/fileio/mtxio.h" +#include "gromacs/gmxlib/md_logging.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/imd/imd.h" +#include "gromacs/linearalgebra/sparsematrix.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +extern void(*plumedcmd)(plumed,const char*,const void*); +/* END PLUMED */ + +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" + +//! Utility structure for manipulating states during EM +typedef struct { + //! Copy of the global state + t_state s; + //! Force array + rvec *f; + //! Potential energy + real epot; + //! Norm of the force + real fnorm; + //! Maximum force + real fmax; + //! Direction + int a_fmax; +} em_state_t; + +//! Initiate em_state_t structure and return pointer to it +static em_state_t *init_em_state() +{ + em_state_t *ems; + + snew(ems, 1); + + /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */ + snew(ems->s.lambda, efptNR); + + return ems; +} + +//! Print the EM starting conditions +static void print_em_start(FILE *fplog, + t_commrec *cr, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle, + const char *name) +{ + walltime_accounting_start(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, name); +} + +//! Stop counting time for EM +static void em_time_end(gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + wallcycle_stop(wcycle, ewcRUN); + + walltime_accounting_end(walltime_accounting); +} + +//! Printing a log file and console header +static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) +{ + fprintf(out, "\n"); + fprintf(out, "%s:\n", minimizer); + fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); + fprintf(out, " Number of steps = %12d\n", nsteps); +} + +//! Print warning message +static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain) +{ + char buffer[2048]; + if (bLastStep) + { + sprintf(buffer, + "\nEnergy minimization reached the maximum number " + "of steps before the forces reached the requested " + "precision Fmax < %g.\n", ftol); + } + else + { + sprintf(buffer, + "\nEnergy minimization has stopped, but the forces have " + "not converged to the requested precision Fmax < %g (which " + "may not be possible for your system). It stopped " + "because the algorithm tried to make a new step whose size " + "was too small, or there was no change in the energy since " + "last step. Either way, we regard the minimization as " + "converged to within the available machine precision, " + "given your starting configuration and EM parameters.\n%s%s", + ftol, + sizeof(real) < sizeof(double) ? + "\nDouble precision normally gives you higher accuracy, but " + "this is often not needed for preparing to run molecular " + "dynamics.\n" : + "", + bConstrain ? + "You might need to increase your constraint accuracy, or turn\n" + "off constraints altogether (set constraints = none in mdp file)\n" : + ""); + } + fputs(wrap_lines(buffer, 78, 0, FALSE), fp); +} + +//! Print message about convergence of the EM +static void print_converged(FILE *fp, const char *alg, real ftol, + gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps, + real epot, real fmax, int nfmax, real fnorm) +{ + char buf[STEPSTRSIZE]; + + if (bDone) + { + fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", + alg, ftol, gmx_step_str(count, buf)); + } + else if (count < nsteps) + { + fprintf(fp, "\n%s converged to machine precision in %s steps,\n" + "but did not reach the requested Fmax < %g.\n", + alg, gmx_step_str(count, buf), ftol); + } + else + { + fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", + alg, ftol, gmx_step_str(count, buf)); + } + +#if GMX_DOUBLE + fprintf(fp, "Potential Energy = %21.14e\n", epot); + fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1); + fprintf(fp, "Norm of force = %21.14e\n", fnorm); +#else + fprintf(fp, "Potential Energy = %14.7e\n", epot); + fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1); + fprintf(fp, "Norm of force = %14.7e\n", fnorm); +#endif +} + +//! Compute the norm and max of the force array in parallel +static void get_f_norm_max(t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, rvec *f, + real *fnorm, real *fmax, int *a_fmax) +{ + double fnorm2, *sum; + real fmax2, fam; + int la_max, a_max, start, end, i, m, gf; + + /* This routine finds the largest force and returns it. + * On parallel machines the global max is taken. + */ + fnorm2 = 0; + fmax2 = 0; + la_max = -1; + start = 0; + end = mdatoms->homenr; + if (mdatoms->cFREEZE) + { + for (i = start; i < end; i++) + { + gf = mdatoms->cFREEZE[i]; + fam = 0; + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + fam += gmx::square(f[i][m]); + } + } + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + else + { + for (i = start; i < end; i++) + { + fam = norm2(f[i]); + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + + if (la_max >= 0 && DOMAINDECOMP(cr)) + { + a_max = cr->dd->gatindex[la_max]; + } + else + { + a_max = la_max; + } + if (PAR(cr)) + { + snew(sum, 2*cr->nnodes+1); + sum[2*cr->nodeid] = fmax2; + sum[2*cr->nodeid+1] = a_max; + sum[2*cr->nnodes] = fnorm2; + gmx_sumd(2*cr->nnodes+1, sum, cr); + fnorm2 = sum[2*cr->nnodes]; + /* Determine the global maximum */ + for (i = 0; i < cr->nnodes; i++) + { + if (sum[2*i] > fmax2) + { + fmax2 = sum[2*i]; + a_max = (int)(sum[2*i+1] + 0.5); + } + } + sfree(sum); + } + + if (fnorm) + { + *fnorm = sqrt(fnorm2); + } + if (fmax) + { + *fmax = sqrt(fmax2); + } + if (a_fmax) + { + *a_fmax = a_max; + } +} + +//! Compute the norm of the force +static void get_state_f_norm_max(t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, + em_state_t *ems) +{ + get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax); +} + +//! Initialize the energy minimization +void init_em(FILE *fplog, const char *title, + t_commrec *cr, t_inputrec *ir, + t_state *state_global, gmx_mtop_t *top_global, + em_state_t *ems, gmx_localtop_t **top, + rvec **f, + t_nrnb *nrnb, rvec mu_tot, + t_forcerec *fr, gmx_enerdata_t **enerd, + t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat, + gmx_vsite_t *vsite, gmx_constr_t constr, + int nfile, const t_filenm fnm[], + gmx_mdoutf_t *outf, t_mdebin **mdebin, + int imdport, unsigned long gmx_unused Flags, + gmx_wallcycle_t wcycle) +{ + int i; + real dvdl_constr; + + if (fplog) + { + fprintf(fplog, "Initiating %s\n", title); + } + + state_global->ngtc = 0; + + /* Initialize lambda variables */ + initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL); + + init_nrnb(nrnb); + + /* Interactive molecular dynamics */ + init_IMD(ir, cr, top_global, fplog, 1, state_global->x, + nfile, fnm, NULL, imdport, Flags); + + if (DOMAINDECOMP(cr)) + { + *top = dd_init_local_top(top_global); + + dd_init_local_state(cr->dd, state_global, &ems->s); + + *f = NULL; + + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + &ems->s, &ems->f, mdatoms, *top, + fr, vsite, constr, + nrnb, NULL, FALSE); + dd_store_state(cr->dd, &ems->s); + + *graph = NULL; + } + else + { + snew(*f, top_global->natoms); + + /* Just copy the state */ + ems->s = *state_global; + /* We need to allocate one element extra, since we might use + * (unaligned) 4-wide SIMD loads to access rvec entries. + */ + snew(ems->s.x, ems->s.nalloc + 1); + snew(ems->f, ems->s.nalloc+1); + snew(ems->s.v, ems->s.nalloc+1); + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(state_global->x[i], ems->s.x[i]); + } + copy_mat(state_global->box, ems->s.box); + + *top = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO); + + forcerec_set_excl_load(fr, *top); + + setup_bonded_threading(fr, &(*top)->idef); + + if (ir->ePBC != epbcNONE && !fr->bMolPBC) + { + *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE); + } + else + { + *graph = NULL; + } + + atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); + update_mdatoms(mdatoms, state_global->lambda[efptFEP]); + + if (vsite) + { + set_vsite_top(vsite, *top, mdatoms, cr); + } + } + + if (constr) + { + if (ir->eConstrAlg == econtSHAKE && + gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) + { + gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", + econstr_names[econtSHAKE], econstr_names[econtLINCS]); + } + + if (!DOMAINDECOMP(cr)) + { + set_constraints(constr, *top, ir, mdatoms, cr); + } + + if (!ir->bContinuation) + { + /* Constrain the starting coordinates */ + dvdl_constr = 0; + constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef, + ir, cr, -1, 0, 1.0, mdatoms, + ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box, + ems->s.lambda[efptFEP], &dvdl_constr, + NULL, NULL, nrnb, econqCoord); + } + } + + if (PAR(cr)) + { + *gstat = global_stat_init(ir); + } + else + { + *gstat = NULL; + } + + *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle); + + snew(*enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + *enerd); + + if (mdebin != NULL) + { + /* Init bin for energy stuff */ + *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL); + } + + clear_rvec(mu_tot); + calc_shifts(ems->s.box, fr->shift_vec); + + /* PLUMED */ + if(plumedswitch){ + if(cr->ms && cr->ms->nsim>1) { + if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); + }else{ + (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); + } + } + (*plumedcmd) (plumedmain,"GREX init",NULL); + } + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); + }else{ + (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim); + } + } + (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms); + (*plumedcmd) (plumedmain,"setMDEngine","gromacs"); + (*plumedcmd) (plumedmain,"setLog",fplog); + real real_delta_t; + real_delta_t=ir->delta_t; + (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t); + (*plumedcmd) (plumedmain,"init",NULL); + + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home); + (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex); + } + } + } + /* END PLUMED */ +} + +//! Finalize the minimization +static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + if (!(cr->duty & DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + done_mdoutf(outf); + + em_time_end(walltime_accounting, wcycle); +} + +//! Swap two different EM states during minimization +static void swap_em_state(em_state_t *ems1, em_state_t *ems2) +{ + em_state_t tmp; + + tmp = *ems1; + *ems1 = *ems2; + *ems2 = tmp; +} + +//! Copy coordinate from an EM state to a "normal" state structure +static void copy_em_coords(em_state_t *ems, t_state *state) +{ + int i; + + for (i = 0; (i < state->natoms); i++) + { + copy_rvec(ems->s.x[i], state->x[i]); + } +} + +//! Save the EM trajectory +static void write_em_traj(FILE *fplog, t_commrec *cr, + gmx_mdoutf_t outf, + gmx_bool bX, gmx_bool bF, const char *confout, + gmx_mtop_t *top_global, + t_inputrec *ir, gmx_int64_t step, + em_state_t *state, + t_state *state_global) +{ + int mdof_flags; + gmx_bool bIMDout = FALSE; + + + /* Shall we do IMD output? */ + if (ir->bIMD) + { + bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup)); + } + + if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr)) + { + copy_em_coords(state, state_global); + } + + mdof_flags = 0; + if (bX) + { + mdof_flags |= MDOF_X; + } + if (bF) + { + mdof_flags |= MDOF_F; + } + + /* If we want IMD output, set appropriate MDOF flag */ + if (ir->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, (double)step, + &state->s, state_global, state->f); + + if (confout != NULL && MASTER(cr)) + { + if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) + { + /* Make molecules whole only for confout writing */ + do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global, + state_global->x); + } + + write_sto_conf_mtop(confout, + *top_global->name, top_global, + state_global->x, NULL, ir->ePBC, state_global->box); + } +} + +//! Do one minimization step +static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, + gmx_bool bMolPBC, + em_state_t *ems1, real a, rvec *f, em_state_t *ems2, + gmx_constr_t constr, gmx_localtop_t *top, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_int64_t count) + +{ + t_state *s1, *s2; + int i; + int start, end; + rvec *x1, *x2; + real dvdl_constr; + int nthreads gmx_unused; + + s1 = &ems1->s; + s2 = &ems2->s; + + if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) + { + gmx_incons("state mismatch in do_em_step"); + } + + s2->flags = s1->flags; + + if (s2->nalloc != s1->nalloc) + { + s2->nalloc = s1->nalloc; + /* We need to allocate one element extra, since we might use + * (unaligned) 4-wide SIMD loads to access rvec entries. + */ + srenew(s2->x, s1->nalloc + 1); + srenew(ems2->f, s1->nalloc); + if (s2->flags & (1<<estCGP)) + { + srenew(s2->cg_p, s1->nalloc + 1); + } + } + + s2->natoms = s1->natoms; + copy_mat(s1->box, s2->box); + /* Copy free energy state */ + for (i = 0; i < efptNR; i++) + { + s2->lambda[i] = s1->lambda[i]; + } + copy_mat(s1->box, s2->box); + + start = 0; + end = md->homenr; + + x1 = s1->x; + x2 = s2->x; + + // cppcheck-suppress unreadVariable + nthreads = gmx_omp_nthreads_get(emntUpdate); +#pragma omp parallel num_threads(nthreads) + { + int gf, i, m; + + gf = 0; +#pragma omp for schedule(static) nowait + for (i = start; i < end; i++) + { + try + { + if (md->cFREEZE) + { + gf = md->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[gf][m]) + { + x2[i][m] = x1[i][m]; + } + else + { + x2[i][m] = x1[i][m] + a*f[i][m]; + } + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + + if (s2->flags & (1<<estCGP)) + { + /* Copy the CG p vector */ + x1 = s1->cg_p; + x2 = s2->cg_p; +#pragma omp for schedule(static) nowait + for (i = start; i < end; i++) + { + // Trivial OpenMP block that does not throw + copy_rvec(x1[i], x2[i]); + } + } + + if (DOMAINDECOMP(cr)) + { + s2->ddp_count = s1->ddp_count; + if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) + { +#pragma omp barrier + s2->cg_gl_nalloc = s1->cg_gl_nalloc; + try + { + /* We need to allocate one element extra, since we might use + * (unaligned) 4-wide SIMD loads to access rvec entries. + */ + srenew(s2->cg_gl, s2->cg_gl_nalloc + 1); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +#pragma omp barrier + } + s2->ncg_gl = s1->ncg_gl; +#pragma omp for schedule(static) nowait + for (i = 0; i < s2->ncg_gl; i++) + { + s2->cg_gl[i] = s1->cg_gl[i]; + } + s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; + } + } + + if (constr) + { + wallcycle_start(wcycle, ewcCONSTR); + dvdl_constr = 0; + constrain(NULL, TRUE, TRUE, constr, &top->idef, + ir, cr, count, 0, 1.0, md, + s1->x, s2->x, NULL, bMolPBC, s2->box, + s2->lambda[efptBONDED], &dvdl_constr, + NULL, NULL, nrnb, econqCoord); + wallcycle_stop(wcycle, ewcCONSTR); + } +} + +//! Prepare EM for using domain decomposition parallellization +static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr, + gmx_mtop_t *top_global, t_inputrec *ir, + em_state_t *ems, gmx_localtop_t *top, + t_mdatoms *mdatoms, t_forcerec *fr, + gmx_vsite_t *vsite, gmx_constr_t constr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle) +{ + /* Repartition the domain decomposition */ + dd_partition_system(fplog, step, cr, FALSE, 1, + NULL, top_global, ir, + &ems->s, &ems->f, + mdatoms, top, fr, vsite, constr, + nrnb, wcycle, FALSE); + dd_store_state(cr->dd, &ems->s); +} + +//! De one energy evaluation +static void evaluate_energy(FILE *fplog, t_commrec *cr, + gmx_mtop_t *top_global, + em_state_t *ems, gmx_localtop_t *top, + t_inputrec *inputrec, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_global_stat_t gstat, + gmx_vsite_t *vsite, gmx_constr_t constr, + t_fcdata *fcd, + t_graph *graph, t_mdatoms *mdatoms, + t_forcerec *fr, rvec mu_tot, + gmx_enerdata_t *enerd, tensor vir, tensor pres, + gmx_int64_t count, gmx_bool bFirst) +{ + real t; + gmx_bool bNS; + tensor force_vir, shake_vir, ekin; + real dvdl_constr, prescorr, enercorr, dvdlcorr; + real terminate = 0; + + /* Set the time to the initial time, the time does not change during EM */ + t = inputrec->init_t; + + if (bFirst || + (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) + { + /* This is the first state or an old state used before the last ns */ + bNS = TRUE; + } + else + { + bNS = FALSE; + if (inputrec->nstlist > 0) + { + bNS = TRUE; + } + } + + if (vsite) + { + construct_vsites(vsite, ems->s.x, 1, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, ems->s.box); + } + + if (DOMAINDECOMP(cr) && bNS) + { + /* Repartition the domain decomposition */ + em_dd_partition_system(fplog, count, cr, top_global, inputrec, + ems, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Calc force & energy on new trial position */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + /* PLUMED */ + int plumedNeedsEnergy=0; + matrix plumed_vir; + if(plumedswitch){ + long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&lstep); + (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]); + (*plumedcmd) (plumedmain,"setMasses",&mdatoms->massT[0]); + (*plumedcmd) (plumedmain,"setCharges",&mdatoms->chargeA[0]); + (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]); + (*plumedcmd) (plumedmain,"prepareCalc",NULL); + (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]); + (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); + clear_mat(plumed_vir); + (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]); + } + /* END PLUMED */ + + do_force(fplog, cr, inputrec, + count, nrnb, wcycle, top, &top_global->groups, + ems->s.box, ems->s.x, &ems->s.hist, + ems->f, force_vir, mdatoms, enerd, fcd, + ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE, + GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | + GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | + (bNS ? GMX_FORCE_NS : 0)); + /* PLUMED */ + if(plumedswitch){ + if(plumedNeedsEnergy) { + msmul(force_vir,2.0,plumed_vir); + (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]); + (*plumedcmd) (plumedmain,"performCalc",NULL); + msmul(plumed_vir,0.5,force_vir); + } else { + msmul(plumed_vir,0.5,plumed_vir); + m_add(force_vir,plumed_vir,force_vir); + } + } + /* END PLUMED */ + + /* Clear the unused shake virial and pressure */ + clear_mat(shake_vir); + clear_mat(pres); + + /* Communicate stuff when parallel */ + if (PAR(cr) && inputrec->eI != eiNM) + { + wallcycle_start(wcycle, ewcMoveE); + + global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, + inputrec, NULL, NULL, NULL, 1, &terminate, + NULL, FALSE, + CGLO_ENERGY | + CGLO_PRESSURE | + CGLO_CONSTRAINT); + + wallcycle_stop(wcycle, ewcMoveE); + } + + /* Calculate long range corrections to pressure and energy */ + calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW], + pres, force_vir, &prescorr, &enercorr, &dvdlcorr); + enerd->term[F_DISPCORR] = enercorr; + enerd->term[F_EPOT] += enercorr; + enerd->term[F_PRES] += prescorr; + enerd->term[F_DVDL] += dvdlcorr; + + ems->epot = enerd->term[F_EPOT]; + + if (constr) + { + /* Project out the constraint components of the force */ + wallcycle_start(wcycle, ewcCONSTR); + dvdl_constr = 0; + constrain(NULL, FALSE, FALSE, constr, &top->idef, + inputrec, cr, count, 0, 1.0, mdatoms, + ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, + ems->s.lambda[efptBONDED], &dvdl_constr, + NULL, &shake_vir, nrnb, econqForceDispl); + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + m_add(force_vir, shake_vir, vir); + wallcycle_stop(wcycle, ewcCONSTR); + } + else + { + copy_mat(force_vir, vir); + } + + clear_mat(ekin); + enerd->term[F_PRES] = + calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); + + sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); + + if (EI_ENERGY_MINIMIZATION(inputrec->eI)) + { + get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems); + } +} + +//! Parallel utility summing energies and forces +static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + rvec *fm, *fb, *fmg; + t_block *cgs_gl; + int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; + double partsum; + unsigned char *grpnrFREEZE; + + if (debug) + { + fprintf(debug, "Doing reorder_partsum\n"); + } + + fm = s_min->f; + fb = s_b->f; + + cgs_gl = dd_charge_groups_global(cr->dd); + index = cgs_gl->index; + + /* Collect fm in a global vector fmg. + * This conflicts with the spirit of domain decomposition, + * but to fully optimize this a much more complicated algorithm is required. + */ + snew(fmg, top_global->natoms); + + ncg = s_min->s.ncg_gl; + cg_gl = s_min->s.cg_gl; + i = 0; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + copy_rvec(fm[i], fmg[a]); + i++; + } + } + gmx_sum(top_global->natoms*3, fmg[0], cr); + + /* Now we will determine the part of the sum for the cgs in state s_b */ + ncg = s_b->s.ncg_gl; + cg_gl = s_b->s.cg_gl; + partsum = 0; + i = 0; + gf = 0; + grpnrFREEZE = top_global->groups.grpnr[egcFREEZE]; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + if (mdatoms->cFREEZE && grpnrFREEZE) + { + gf = grpnrFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; + } + } + i++; + } + } + + sfree(fmg); + + return partsum; +} + +//! Print some stuff, like beta, whatever that means. +static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + rvec *fm, *fb; + double sum; + int gf, i, m; + + /* This is just the classical Polak-Ribiere calculation of beta; + * it looks a bit complicated since we take freeze groups into account, + * and might have to sum it in parallel runs. + */ + + if (!DOMAINDECOMP(cr) || + (s_min->s.ddp_count == cr->dd->ddp_count && + s_b->s.ddp_count == cr->dd->ddp_count)) + { + fm = s_min->f; + fb = s_b->f; + sum = 0; + gf = 0; + /* This part of code can be incorrect with DD, + * since the atom ordering in s_b and s_min might differ. + */ + for (i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + sum += (fb[i][m] - fm[i][m])*fb[i][m]; + } + } + } + } + else + { + /* We need to reorder cgs while summing */ + sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b); + } + if (PAR(cr)) + { + gmx_sumd(1, &sum, cr); + } + + return sum/gmx::square(s_min->fnorm); +} + +namespace gmx +{ + +/*! \brief Do conjugate gradients minimization + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_cg(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + const char *CG = "Polak-Ribiere Conjugate Gradients"; + + em_state_t *s_min, *s_a, *s_b, *s_c; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + rvec *p, *sf; + double gpa, gpb, gpc, tmp, minstep; + real fnormn; + real stepsize; + real a, b, c, beta = 0.0; + real epot_repl = 0; + real pnorm; + t_mdebin *mdebin; + gmx_bool converged, foundlower; + rvec mu_tot; + gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; + tensor vir, pres; + int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; + gmx_mdoutf_t outf; + int i, m, gf, step, nminstep; + + step = 0; + + s_min = init_em_state(); + s_a = init_em_state(); + s_b = init_em_state(); + s_c = init_em_state(); + + /* Init em and store the local state in s_min */ + init_em(fplog, CG, cr, inputrec, + state_global, top_global, s_min, &top, &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, CG); + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + if (MASTER(cr)) + { + sp_header(stderr, CG, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, CG, inputrec->em_tol, number_steps); + } + + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + evaluate_energy(fplog, cr, + top_global, s_min, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, TRUE); + where(); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + where(); + + /* Estimate/guess the initial stepsize */ + stepsize = inputrec->em_stepsize/s_min->fnorm; + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(stderr, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(fplog, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + /* Start the loop over CG steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* start taking steps in a new direction + * First time we enter the routine, beta=0, and the direction is + * simply the negative gradient. + */ + + /* Calculate the new direction in p, and the gradient in this direction, gpa */ + p = s_min->s.cg_p; + sf = s_min->f; + gpa = 0; + gf = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!inputrec->opts.nFreeze[gf][m]) + { + p[i][m] = sf[i][m] + beta*p[i][m]; + gpa -= p[i][m]*sf[i][m]; + /* f is negative gradient, thus the sign */ + } + else + { + p[i][m] = 0; + } + } + } + + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpa, cr); + } + + /* Calculate the norm of the search vector */ + get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL); + + /* Just in case stepsize reaches zero due to numerical precision... */ + if (stepsize <= 0) + { + stepsize = inputrec->em_stepsize/pnorm; + } + + /* + * Double check the value of the derivative in the search direction. + * If it is positive it must be due to the old information in the + * CG formula, so just remove that and start over with beta=0. + * This corresponds to a steepest descent step. + */ + if (gpa > 0) + { + beta = 0; + step--; /* Don't count this step since we are restarting */ + continue; /* Go back to the beginning of the big for-loop */ + } + + /* Calculate minimum allowed stepsize, before the average (norm) + * relative change in coordinate is smaller than precision + */ + minstep = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + tmp = fabs(s_min->s.x[i][m]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = p[i][m]/tmp; + minstep += tmp*tmp; + } + } + /* Add up from all CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &minstep, cr); + } + + minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms)); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + write_em_traj(fplog, cr, outf, do_x, do_f, NULL, + top_global, inputrec, step, + s_min, state_global); + + /* Take a step downhill. + * In theory, we should minimize the function along this direction. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new CG step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * the continue straight to the next CG step without trying to find any minimum. + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to even accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + s_a->epot = s_min->epot; + a = 0.0; + c = a + stepsize; /* reference position along line is zero */ + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) + { + em_dd_partition_system(fplog, step, cr, top_global, inputrec, + s_min, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step (new coords in s_c) */ + do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c, + constr, top, nrnb, wcycle, -1); + + neval++; + /* Calculate energy for the trial step */ + evaluate_energy(fplog, cr, + top_global, s_c, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, FALSE); + + /* Calc derivative along line */ + p = s_c->s.cg_p; + sf = s_c->f; + gpc = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + /* This is the max amount of increase in energy we tolerate */ + tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot); + + /* Accept the step if the energy is lower, or if it is not significantly higher + * and the line derivative is still negative. + */ + if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) + { + foundlower = TRUE; + /* Great, we found a better energy. Increase step for next iteration + * if we are still going down, decrease it otherwise + */ + if (gpc < 0) + { + stepsize *= 1.618034; /* The golden section */ + } + else + { + stepsize *= 0.618034; /* 1/golden section */ + } + } + else + { + /* New energy is the same or higher. We will have to do some work + * to find a smaller value in the interval. Take smaller step next time! + */ + foundlower = FALSE; + stepsize *= 0.618034; + } + + + + + /* OK, if we didn't find a lower value we will have to locate one now - there must + * be one in the interval [a=0,c]. + * The same thing is valid here, though: Don't spend dozens of iterations to find + * the line minimum. We try to interpolate based on the derivative at the endpoints, + * and only continue until we find a lower value. In most cases this means 1-2 iterations. + * + * I also have a safeguard for potentially really pathological functions so we never + * take more than 20 steps before we give up ... + * + * If we already found a lower value we just skip this step and continue to the update. + */ + if (!foundlower) + { + nminstep = 0; + + do + { + /* Select a new trial point. + * If the derivatives at points a & c have different sign we interpolate to zero, + * otherwise just do a bisection. + */ + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, -1, cr, top_global, inputrec, + s_min, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step to this new point - new coords in s_b */ + do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b, + constr, top, nrnb, wcycle, -1); + + neval++; + /* Calculate energy for the trial step */ + evaluate_energy(fplog, cr, + top_global, s_b, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, FALSE); + + /* p does not change within a step, but since the domain decomposition + * might change, we have to use cg_p of s_b here. + */ + p = s_b->s.cg_p; + sf = s_b->f; + gpb = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + if (debug) + { + fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", + s_a->epot, s_b->epot, s_c->epot, gpb); + } + + epot_repl = s_b->epot; + + /* Keep one of the intervals based on the value of the derivative at the new point */ + if (gpb > 0) + { + /* Replace c endpoint with b */ + swap_em_state(s_b, s_c); + c = b; + gpc = gpb; + } + else + { + /* Replace a endpoint with b */ + swap_em_state(s_b, s_a); + a = b; + gpa = gpb; + } + + /* + * Stop search as soon as we find a value smaller than the endpoints. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && + (nminstep < 20)); + + if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || + nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If beta==0 this was steepest descent, and then we give up. + * If not, set beta=0 and restart with steepest descent before quitting. + */ + if (beta == 0.0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory before giving up */ + beta = 0.0; + continue; + } + } + + /* Select min energy state of A & C, put the best in B. + */ + if (s_c->epot < s_a->epot) + { + if (debug) + { + fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", + s_c->epot, s_a->epot); + } + swap_em_state(s_b, s_c); + gpb = gpc; + } + else + { + if (debug) + { + fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", + s_a->epot, s_c->epot); + } + swap_em_state(s_b, s_a); + gpb = gpa; + } + + } + else + { + if (debug) + { + fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", + s_c->epot); + } + swap_em_state(s_b, s_c); + gpb = gpc; + } + + /* new search direction */ + /* beta = 0 means forget all memory and restart with steepest descents. */ + if (nstcg && ((step % nstcg) == 0)) + { + beta = 0.0; + } + else + { + /* s_min->fnorm cannot be zero, because then we would have converged + * and broken out. + */ + + /* Polak-Ribiere update. + * Change to fnorm2/fnorm2_old for Fletcher-Reeves + */ + beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); + } + /* Limit beta to prevent oscillations */ + if (fabs(beta) > 5.0) + { + beta = 0.0; + } + + + /* update positions */ + swap_em_state(s_min, s_b); + gpa = gpb; + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (bVerbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, s_min->epot, s_min->fnorm/sqrtNumAtoms, + s_min->fmax, s_min->a_fmax+1); + fflush(stderr); + } + /* Store the new (lower) energies */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE); + + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + + /* Send energies and positions to the IMD client if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (s_min->fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (s_min->fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); + warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + if (MASTER(cr)) + { + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) + { + /* Write final value to log since we didn't do anything the last step */ + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) + { + /* Write final energy file entries */ + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); + + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + s_min, state_global); + + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fnormn = s_min->fnorm/sqrtNumAtoms; + print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); + + return 0; +} /* That's all folks */ + + +/*! \brief Do L-BFGS conjugate gradients minimization + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_lbfgs(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + static const char *LBFGS = "Low-Memory BFGS Minimizer"; + em_state_t ems; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + int ncorr, nmaxcorr, point, cp, neval, nminstep; + double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep; + real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg; + real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp; + real a, b, c, maxdelta, delta; + real diag, Epot0, Epot, EpotA, EpotB, EpotC; + real dgdx, dgdg, sq, yr, beta; + t_mdebin *mdebin; + gmx_bool converged; + rvec mu_tot; + real fnorm, fmax; + gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; + tensor vir, pres; + int start, end, number_steps; + gmx_mdoutf_t outf; + int i, k, m, n, nfmax, gf, step; + int mdof_flags; + + if (PAR(cr)) + { + gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n"); + } + + if (NULL != constr) + { + gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); + } + + n = 3*state_global->natoms; + nmaxcorr = inputrec->nbfgscorr; + + /* Allocate memory */ + /* Use pointers to real so we dont have to loop over both atoms and + * dimensions all the time... + * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real + * that point to the same memory. + */ + snew(xa, n); + snew(xb, n); + snew(xc, n); + snew(fa, n); + snew(fb, n); + snew(fc, n); + snew(frozen, n); + + snew(p, n); + snew(lastx, n); + snew(lastf, n); + snew(rho, nmaxcorr); + snew(alpha, nmaxcorr); + + snew(dx, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dx[i], n); + } + + snew(dg, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dg[i], n); + } + + step = 0; + neval = 0; + + /* Init em */ + init_em(fplog, LBFGS, cr, inputrec, + state_global, top_global, &ems, &top, &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); + /* Do_lbfgs is not completely updated like do_steep and do_cg, + * so we free some memory again. + */ + sfree(ems.s.x); + sfree(ems.f); + + xx = (real *)state_global->x; + ff = (real *)f; + + start = 0; + end = mdatoms->homenr; + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); + + do_log = do_ene = do_x = do_f = TRUE; + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ + gf = 0; + for (i = start; i < end; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + frozen[3*i+m] = inputrec->opts.nFreeze[gf][m]; + } + } + if (MASTER(cr)) + { + sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); + } + + if (vsite) + { + construct_vsites(vsite, state_global->x, 1, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state_global->box); + } + + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole + */ + neval++; + ems.s.x = state_global->x; + ems.f = f; + evaluate_energy(fplog, cr, + top_global, &ems, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, TRUE); + where(); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + where(); + + /* This is the starting energy */ + Epot = enerd->term[F_EPOT]; + + fnorm = ems.fnorm; + fmax = ems.fmax; + nfmax = ems.a_fmax; + + /* Set the initial step. + * since it will be multiplied by the non-normalized search direction + * vector (force vector the first time), we scale it by the + * norm of the force. + */ + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); + fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); + fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + + // Point is an index to the memory of search directions, where 0 is the first one. + point = 0; + + // Set initial search direction to the force (-gradient), or 0 for frozen particles. + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = ff[i]; /* Initial search direction */ + } + else + { + dx[point][i] = 0; + } + } + + // Stepsize will be modified during the search, and actually it is not critical + // (the main efficiency in the algorithm comes from changing directions), but + // we still need an initial value, so estimate it as the inverse of the norm + // so we take small steps where the potential fluctuates a lot. + stepsize = 1.0/fnorm; + + /* Start the loop over BFGS steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + + ncorr = 0; + + /* Set the gradient from the force */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + mdof_flags = 0; + if (do_x) + { + mdof_flags |= MDOF_X; + } + + if (do_f) + { + mdof_flags |= MDOF_F; + } + + if (inputrec->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, (real)step, state_global, state_global, f); + + /* Do the linesearching in the direction dx[point][0..(n-1)] */ + + /* make s a pointer to current search direction - point=0 first time we get here */ + s = dx[point]; + + // calculate line gradient in position A + for (gpa = 0, i = 0; i < n; i++) + { + gpa -= s[i]*ff[i]; + } + + /* Calculate minimum allowed stepsize along the line, before the average (norm) + * relative change in coordinate is smaller than precision + */ + for (minstep = 0, i = 0; i < n; i++) + { + tmp = fabs(xx[i]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = s[i]/tmp; + minstep += tmp*tmp; + } + minstep = GMX_REAL_EPS/sqrt(minstep/n); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + // Before taking any steps along the line, store the old position + for (i = 0; i < n; i++) + { + lastx[i] = xx[i]; + lastf[i] = ff[i]; + } + Epot0 = Epot; + + for (i = 0; i < n; i++) + { + xa[i] = xx[i]; + } + + /* Take a step downhill. + * In theory, we should find the actual minimum of the function in this + * direction, somewhere along the line. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new BFGS step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * continue straight to the next BFGS step without trying to find any minimum, + * i.e. we change the search direction too. If the line was smooth, it is + * likely we are in a smooth region, and then it makes sense to take longer + * steps in the modified search direction too. + * + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. Then we need to start by finding a lower + * value before we change search direction. Since the energy was apparently + * quite rough, we need to decrease the step size. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + + // State "A" is the first position along the line. + // reference position along line is initially zero + EpotA = Epot0; + a = 0.0; + + // Check stepsize first. We do not allow displacements + // larger than emstep. + // + do + { + // Pick a new position C by adding stepsize to A. + c = a + stepsize; + + // Calculate what the largest change in any individual coordinate + // would be (translation along line * gradient along line) + maxdelta = 0; + for (i = 0; i < n; i++) + { + delta = c*s[i]; + if (delta > maxdelta) + { + maxdelta = delta; + } + } + // If any displacement is larger than the stepsize limit, reduce the step + if (maxdelta > inputrec->em_stepsize) + { + stepsize *= 0.1; + } + } + while (maxdelta > inputrec->em_stepsize); + + // Take a trial step and move the coordinate array xc[] to position C + for (i = 0; i < n; i++) + { + xc[i] = lastx[i] + c*s[i]; + } + + neval++; + // Calculate energy for the trial step in position C + ems.s.x = (rvec *)xc; + ems.f = (rvec *)fc; + evaluate_energy(fplog, cr, + top_global, &ems, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, step, FALSE); + EpotC = ems.epot; + + // Calc line gradient in position C + for (gpc = 0, i = 0; i < n; i++) + { + gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + // This is the max amount of increase in energy we tolerate. + // By allowing VERY small changes (close to numerical precision) we + // frequently find even better (lower) final energies. + tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA); + + // Accept the step if the energy is lower in the new position C (compared to A), + // or if it is not significantly higher and the line derivative is still negative. + if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp))) + { + // Great, we found a better energy. We no longer try to alter the + // stepsize, but simply accept this new better position. The we select a new + // search direction instead, which will be much more efficient than continuing + // to take smaller steps along a line. Set fnorm based on the new C position, + // which will be used to update the stepsize to 1/fnorm further down. + foundlower = TRUE; + fnorm = ems.fnorm; + } + else + { + // If we got here, the energy is NOT lower in point C, i.e. it will be the same + // or higher than in point A. In this case it is pointless to move to point C, + // so we will have to do more iterations along the same line to find a smaller + // value in the interval [A=0.0,C]. + // Here, A is still 0.0, but that will change when we do a search in the interval + // [0.0,C] below. That search we will do by interpolation or bisection rather + // than with the stepsize, so no need to modify it. For the next search direction + // it will be reset to 1/fnorm anyway. + foundlower = FALSE; + } + + if (!foundlower) + { + // OK, if we didn't find a lower value we will have to locate one now - there must + // be one in the interval [a,c]. + // The same thing is valid here, though: Don't spend dozens of iterations to find + // the line minimum. We try to interpolate based on the derivative at the endpoints, + // and only continue until we find a lower value. In most cases this means 1-2 iterations. + // I also have a safeguard for potentially really pathological functions so we never + // take more than 20 steps before we give up. + // If we already found a lower value we just skip this step and continue to the update. + nminstep = 0; + do + { + // Select a new trial point B in the interval [A,C]. + // If the derivatives at points a & c have different sign we interpolate to zero, + // otherwise just do a bisection since there might be multiple minima/maxima + // inside the interval. + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + // Take a trial step to point B + for (i = 0; i < n; i++) + { + xb[i] = lastx[i] + b*s[i]; + } + + neval++; + // Calculate energy for the trial step in point B + ems.s.x = (rvec *)xb; + ems.f = (rvec *)fb; + evaluate_energy(fplog, cr, + top_global, &ems, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, step, FALSE); + EpotB = ems.epot; + fnorm = ems.fnorm; + + // Calculate gradient in point B + for (gpb = 0, i = 0; i < n; i++) + { + gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ + + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative + // at the new point B, and rename the endpoints of this new interval A and C. + if (gpb > 0) + { + /* Replace c endpoint with b */ + EpotC = EpotB; + c = b; + gpc = gpb; + /* swap coord pointers b/c */ + xtmp = xb; + ftmp = fb; + xb = xc; + fb = fc; + xc = xtmp; + fc = ftmp; + } + else + { + /* Replace a endpoint with b */ + EpotA = EpotB; + a = b; + gpa = gpb; + /* swap coord pointers a/b */ + xtmp = xb; + ftmp = fb; + xb = xa; + fb = fa; + xa = xtmp; + fa = ftmp; + } + + /* + * Stop search as soon as we find a value smaller than the endpoints, + * or if the tolerance is below machine precision. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20)); + + if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If ncorr==0 this was steepest descent, and then we give up. + * If not, reset memory to restart as steepest descent before quitting. + */ + if (ncorr == 0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory */ + ncorr = 0; + /* Search in gradient direction */ + for (i = 0; i < n; i++) + { + dx[point][i] = ff[i]; + } + /* Reset stepsize */ + stepsize = 1.0/fnorm; + continue; + } + } + + /* Select min energy state of A & C, put the best in xx/ff/Epot + */ + if (EpotC < EpotA) + { + Epot = EpotC; + /* Use state C */ + for (i = 0; i < n; i++) + { + xx[i] = xc[i]; + ff[i] = fc[i]; + } + step_taken = c; + } + else + { + Epot = EpotA; + /* Use state A */ + for (i = 0; i < n; i++) + { + xx[i] = xa[i]; + ff[i] = fa[i]; + } + step_taken = a; + } + + } + else + { + /* found lower */ + Epot = EpotC; + /* Use state C */ + for (i = 0; i < n; i++) + { + xx[i] = xc[i]; + ff[i] = fc[i]; + } + step_taken = c; + } + + /* Update the memory information, and calculate a new + * approximation of the inverse hessian + */ + + /* Have new data in Epot, xx, ff */ + if (ncorr < nmaxcorr) + { + ncorr++; + } + + for (i = 0; i < n; i++) + { + dg[point][i] = lastf[i]-ff[i]; + dx[point][i] *= step_taken; + } + + dgdg = 0; + dgdx = 0; + for (i = 0; i < n; i++) + { + dgdg += dg[point][i]*dg[point][i]; + dgdx += dg[point][i]*dx[point][i]; + } + + diag = dgdx/dgdg; + + rho[point] = 1.0/dgdx; + point++; + + if (point >= nmaxcorr) + { + point = 0; + } + + /* Update */ + for (i = 0; i < n; i++) + { + p[i] = ff[i]; + } + + cp = point; + + /* Recursive update. First go back over the memory points */ + for (k = 0; k < ncorr; k++) + { + cp--; + if (cp < 0) + { + cp = ncorr-1; + } + + sq = 0; + for (i = 0; i < n; i++) + { + sq += dx[cp][i]*p[i]; + } + + alpha[cp] = rho[cp]*sq; + + for (i = 0; i < n; i++) + { + p[i] -= alpha[cp]*dg[cp][i]; + } + } + + for (i = 0; i < n; i++) + { + p[i] *= diag; + } + + /* And then go forward again */ + for (k = 0; k < ncorr; k++) + { + yr = 0; + for (i = 0; i < n; i++) + { + yr += p[i]*dg[cp][i]; + } + + beta = rho[cp]*yr; + beta = alpha[cp]-beta; + + for (i = 0; i < n; i++) + { + p[i] += beta*dx[cp][i]; + } + + cp++; + if (cp >= ncorr) + { + cp = 0; + } + } + + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = p[i]; + } + else + { + dx[point][i] = 0; + } + } + + /* Test whether the convergence criterion is met */ + get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax); + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (bVerbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1); + fflush(stderr); + } + /* Store the new (lower) energies */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + + /* Send x and E to IMD client, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + // Reset stepsize in we are doing more iterations + stepsize = 1.0/fnorm; + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); + warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) /* Write final value to log since we didn't do anythin last step */ + { + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) /* Write final energy file entries */ + { + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = !do_per_step(step, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + &ems, state_global); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, + number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms); + print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, + number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); + + return 0; +} /* That's all folks */ + +/*! \brief Do steepest descents minimization + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_steep(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + const char *SD = "Steepest Descents"; + em_state_t *s_min, *s_try; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + real stepsize; + real ustep, fnormn; + gmx_mdoutf_t outf; + t_mdebin *mdebin; + gmx_bool bDone, bAbort, do_x, do_f; + tensor vir, pres; + rvec mu_tot; + int nsteps; + int count = 0; + int steps_accepted = 0; + + s_min = init_em_state(); + s_try = init_em_state(); + + /* Init em and store the local state in s_try */ + init_em(fplog, SD, cr, inputrec, + state_global, top_global, s_try, &top, &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, SD); + + /* Set variables for stepsize (in nm). This is the largest + * step that we are going to make in any direction. + */ + ustep = inputrec->em_stepsize; + stepsize = 0; + + /* Max number of steps */ + nsteps = inputrec->nsteps; + + if (MASTER(cr)) + { + /* Print to the screen */ + sp_header(stderr, SD, inputrec->em_tol, nsteps); + } + if (fplog) + { + sp_header(fplog, SD, inputrec->em_tol, nsteps); + } + + /**** HERE STARTS THE LOOP **** + * count is the counter for the number of steps + * bDone will be TRUE when the minimization has converged + * bAbort will be TRUE when nsteps steps have been performed or when + * the stepsize becomes smaller than is reasonable for machine precision + */ + count = 0; + bDone = FALSE; + bAbort = FALSE; + while (!bDone && !bAbort) + { + bAbort = (nsteps >= 0) && (count == nsteps); + + /* set new coordinates, except for first step */ + if (count > 0) + { + do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, + s_min, stepsize, s_min->f, s_try, + constr, top, nrnb, wcycle, count); + } + + evaluate_energy(fplog, cr, + top_global, s_try, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, count, count == 0); + + if (MASTER(cr)) + { + print_ebin_header(fplog, count, count); + } + + if (count == 0) + { + s_min->epot = s_try->epot; + } + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (bVerbose) + { + fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", + count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, + ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r'); + fflush(stderr); + } + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + /* Store the new (lower) energies */ + upd_mdebin(mdebin, FALSE, FALSE, (double)count, + mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals, + s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE); + + print_ebin(mdoutf_get_fp_ene(outf), TRUE, + do_per_step(steps_accepted, inputrec->nstdisreout), + do_per_step(steps_accepted, inputrec->nstorireout), + fplog, count, count, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + fflush(fplog); + } + } + + /* Now if the new energy is smaller than the previous... + * or if this is the first step! + * or if we did random steps! + */ + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + steps_accepted++; + + /* Test whether the convergence criterion is met... */ + bDone = (s_try->fmax < inputrec->em_tol); + + /* Copy the arrays for force, positions and energy */ + /* The 'Min' array always holds the coords and forces of the minimal + sampled energy */ + swap_em_state(s_min, s_try); + if (count > 0) + { + ustep *= 1.2; + } + + /* Write to trn, if necessary */ + do_x = do_per_step(steps_accepted, inputrec->nstxout); + do_f = do_per_step(steps_accepted, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, NULL, + top_global, inputrec, count, + s_min, state_global); + } + else + { + /* If energy is not smaller make the step smaller... */ + ustep *= 0.5; + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, count, cr, top_global, inputrec, + s_min, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + } + + /* Determine new step */ + stepsize = ustep/s_min->fmax; + + /* Check if stepsize is too small, with 1 nm as a characteristic length */ +#if GMX_DOUBLE + if (count == nsteps || ustep < 1e-12) +#else + if (count == nsteps || ustep < 1e-6) +#endif + { + if (MASTER(cr)) + { + warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL); + warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL); + } + bAbort = TRUE; + } + + /* Send IMD energies and positions, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + count++; + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + /* Print some data... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, count, + s_min, state_global); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fnormn = s_min->fnorm/sqrtNumAtoms; + + print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + inputrec->nsteps = count; + + walltime_accounting_set_nsteps_done(walltime_accounting, count); + + return 0; +} /* That's all folks */ + +/*! \brief Do normal modes analysis + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_nm(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + const char *NM = "Normal Mode Analysis"; + gmx_mdoutf_t outf; + int nnodes, node; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + tensor vir, pres; + rvec mu_tot; + rvec *fneg, *dfdx; + gmx_bool bSparse; /* use sparse matrix storage format */ + size_t sz; + gmx_sparsematrix_t * sparse_matrix = NULL; + real * full_matrix = NULL; + em_state_t * state_work; + + /* added with respect to mdrun */ + int row, col; + real der_range = 10.0*sqrt(GMX_REAL_EPS); + real x_min; + bool bIsMaster = MASTER(cr); + + if (constr != NULL) + { + gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); + } + + state_work = init_em_state(); + + /* Init em and store the local state in state_minimum */ + init_em(fplog, NM, cr, inputrec, + state_global, top_global, state_work, &top, + &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, NULL, imdport, Flags, wcycle); + + gmx_shellfc_t *shellfc = init_shell_flexcon(stdout, + top_global, + n_flexible_constraints(constr), + inputrec->nstcalcenergy, + DOMAINDECOMP(cr)); + + if (shellfc) + { + make_local_shells(cr, mdatoms, shellfc); + } + std::vector<size_t> atom_index = get_atom_index(top_global); + snew(fneg, atom_index.size()); + snew(dfdx, atom_index.size()); + +#if !GMX_DOUBLE + if (bIsMaster) + { + fprintf(stderr, + "NOTE: This version of GROMACS has been compiled in single precision,\n" + " which MIGHT not be accurate enough for normal mode analysis.\n" + " GROMACS now uses sparse matrix storage, so the memory requirements\n" + " are fairly modest even if you recompile in double precision.\n\n"); + } +#endif + + /* Check if we can/should use sparse storage format. + * + * Sparse format is only useful when the Hessian itself is sparse, which it + * will be when we use a cutoff. + * For small systems (n<1000) it is easier to always use full matrix format, though. + */ + if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0) + { + md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n"); + bSparse = FALSE; + } + else if (atom_index.size() < 1000) + { + md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", atom_index.size()); + bSparse = FALSE; + } + else + { + md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n"); + bSparse = TRUE; + } + + /* Number of dimensions, based on real atoms, that is not vsites or shell */ + sz = DIM*atom_index.size(); + + fprintf(stderr, "Allocating Hessian memory...\n\n"); + + if (bSparse) + { + sparse_matrix = gmx_sparsematrix_init(sz); + sparse_matrix->compressed_symmetric = TRUE; + } + else + { + snew(full_matrix, sz*sz); + } + + init_nrnb(nrnb); + + where(); + + /* Write start time and temperature */ + print_em_start(fplog, cr, walltime_accounting, wcycle, NM); + + /* fudge nr of steps to nr of atoms */ + inputrec->nsteps = atom_index.size()*2; + + if (bIsMaster) + { + fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n", + *(top_global->name), (int)inputrec->nsteps); + } + + nnodes = cr->nnodes; + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + evaluate_energy(fplog, cr, + top_global, state_work, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, TRUE); + cr->nnodes = nnodes; + + /* if forces are not small, warn user */ + get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work); + + md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax); + if (state_work->fmax > 1.0e-3) + { + md_print_info(cr, fplog, + "The force is probably not small enough to " + "ensure that you are at a minimum.\n" + "Be aware that negative eigenvalues may occur\n" + "when the resulting matrix is diagonalized.\n\n"); + } + + /*********************************************************** + * + * Loop over all pairs in matrix + * + * do_force called twice. Once with positive and + * once with negative displacement + * + ************************************************************/ + + /* Steps are divided one by one over the nodes */ + bool bNS = true; + for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes) + { + size_t atom = atom_index[aid]; + for (size_t d = 0; d < DIM; d++) + { + gmx_bool bBornRadii = FALSE; + gmx_int64_t step = 0; + int force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES; + double t = 0; + + x_min = state_work->s.x[atom][d]; + + for (unsigned int dx = 0; (dx < 2); dx++) + { + if (dx == 0) + { + state_work->s.x[atom][d] = x_min - der_range; + } + else + { + state_work->s.x[atom][d] = x_min + der_range; + } + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + if (shellfc) + { + /* Now is the time to relax the shells */ + (void) relax_shell_flexcon(fplog, cr, bVerbose, step, + inputrec, bNS, force_flags, + top, + constr, enerd, fcd, + &state_work->s, state_work->f, vir, mdatoms, + nrnb, wcycle, graph, &top_global->groups, + shellfc, fr, bBornRadii, t, mu_tot, + vsite, NULL); + bNS = false; + step++; + } + else + { + evaluate_energy(fplog, cr, + top_global, state_work, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, atom*2+dx, FALSE); + } + + cr->nnodes = nnodes; + + if (dx == 0) + { + for (size_t i = 0; i < atom_index.size(); i++) + { + copy_rvec(state_work->f[atom_index[i]], fneg[i]); + } + } + } + + /* x is restored to original */ + state_work->s.x[atom][d] = x_min; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; (k < DIM); k++) + { + dfdx[j][k] = + -(state_work->f[atom_index[j]][k] - fneg[j][k])/(2*der_range); + } + } + + if (!bIsMaster) + { +#if GMX_MPI +#define mpi_type GMX_MPI_REAL + MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr), + cr->nodeid, cr->mpi_comm_mygroup); +#endif + } + else + { + for (node = 0; (node < nnodes && atom+node < atom_index.size()); node++) + { + if (node > 0) + { +#if GMX_MPI + MPI_Status stat; + MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node, + cr->mpi_comm_mygroup, &stat); +#undef mpi_type +#endif + } + + row = (atom + node)*DIM + d; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; k < DIM; k++) + { + col = j*DIM + k; + + if (bSparse) + { + if (col >= row && dfdx[j][k] != 0.0) + { + gmx_sparsematrix_increment_value(sparse_matrix, + row, col, dfdx[j][k]); + } + } + else + { + full_matrix[row*sz+col] = dfdx[j][k]; + } + } + } + } + } + + if (bVerbose && fplog) + { + fflush(fplog); + } + } + /* write progress */ + if (bIsMaster && bVerbose) + { + fprintf(stderr, "\rFinished step %d out of %d", + static_cast<int>(std::min(atom+nnodes, atom_index.size())), + static_cast<int>(atom_index.size())); + fflush(stderr); + } + } + + if (bIsMaster) + { + fprintf(stderr, "\n\nWriting Hessian...\n"); + gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2); + + return 0; +} + +} // namespace gmx diff --git a/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/minimize.cpp.preplumed b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/minimize.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..5a205694090705e66fb3b3a07da9748f185c8bd7 --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/gromacs/mdlib/minimize.cpp.preplumed @@ -0,0 +1,3054 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file defines integrators for energy minimization + * + * \author Berk Hess <hess@kth.se> + * \author Erik Lindahl <erik@kth.se> + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "minimize.h" + +#include "config.h" + +#include <cmath> +#include <cstring> +#include <ctime> + +#include <algorithm> +#include <vector> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/fileio/confio.h" +#include "gromacs/fileio/mtxio.h" +#include "gromacs/gmxlib/md_logging.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/imd/imd.h" +#include "gromacs/linearalgebra/sparsematrix.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" + +//! Utility structure for manipulating states during EM +typedef struct { + //! Copy of the global state + t_state s; + //! Force array + rvec *f; + //! Potential energy + real epot; + //! Norm of the force + real fnorm; + //! Maximum force + real fmax; + //! Direction + int a_fmax; +} em_state_t; + +//! Initiate em_state_t structure and return pointer to it +static em_state_t *init_em_state() +{ + em_state_t *ems; + + snew(ems, 1); + + /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */ + snew(ems->s.lambda, efptNR); + + return ems; +} + +//! Print the EM starting conditions +static void print_em_start(FILE *fplog, + t_commrec *cr, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle, + const char *name) +{ + walltime_accounting_start(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, name); +} + +//! Stop counting time for EM +static void em_time_end(gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + wallcycle_stop(wcycle, ewcRUN); + + walltime_accounting_end(walltime_accounting); +} + +//! Printing a log file and console header +static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) +{ + fprintf(out, "\n"); + fprintf(out, "%s:\n", minimizer); + fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); + fprintf(out, " Number of steps = %12d\n", nsteps); +} + +//! Print warning message +static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain) +{ + char buffer[2048]; + if (bLastStep) + { + sprintf(buffer, + "\nEnergy minimization reached the maximum number " + "of steps before the forces reached the requested " + "precision Fmax < %g.\n", ftol); + } + else + { + sprintf(buffer, + "\nEnergy minimization has stopped, but the forces have " + "not converged to the requested precision Fmax < %g (which " + "may not be possible for your system). It stopped " + "because the algorithm tried to make a new step whose size " + "was too small, or there was no change in the energy since " + "last step. Either way, we regard the minimization as " + "converged to within the available machine precision, " + "given your starting configuration and EM parameters.\n%s%s", + ftol, + sizeof(real) < sizeof(double) ? + "\nDouble precision normally gives you higher accuracy, but " + "this is often not needed for preparing to run molecular " + "dynamics.\n" : + "", + bConstrain ? + "You might need to increase your constraint accuracy, or turn\n" + "off constraints altogether (set constraints = none in mdp file)\n" : + ""); + } + fputs(wrap_lines(buffer, 78, 0, FALSE), fp); +} + +//! Print message about convergence of the EM +static void print_converged(FILE *fp, const char *alg, real ftol, + gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps, + real epot, real fmax, int nfmax, real fnorm) +{ + char buf[STEPSTRSIZE]; + + if (bDone) + { + fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", + alg, ftol, gmx_step_str(count, buf)); + } + else if (count < nsteps) + { + fprintf(fp, "\n%s converged to machine precision in %s steps,\n" + "but did not reach the requested Fmax < %g.\n", + alg, gmx_step_str(count, buf), ftol); + } + else + { + fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", + alg, ftol, gmx_step_str(count, buf)); + } + +#if GMX_DOUBLE + fprintf(fp, "Potential Energy = %21.14e\n", epot); + fprintf(fp, "Maximum force = %21.14e on atom %d\n", fmax, nfmax+1); + fprintf(fp, "Norm of force = %21.14e\n", fnorm); +#else + fprintf(fp, "Potential Energy = %14.7e\n", epot); + fprintf(fp, "Maximum force = %14.7e on atom %d\n", fmax, nfmax+1); + fprintf(fp, "Norm of force = %14.7e\n", fnorm); +#endif +} + +//! Compute the norm and max of the force array in parallel +static void get_f_norm_max(t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, rvec *f, + real *fnorm, real *fmax, int *a_fmax) +{ + double fnorm2, *sum; + real fmax2, fam; + int la_max, a_max, start, end, i, m, gf; + + /* This routine finds the largest force and returns it. + * On parallel machines the global max is taken. + */ + fnorm2 = 0; + fmax2 = 0; + la_max = -1; + start = 0; + end = mdatoms->homenr; + if (mdatoms->cFREEZE) + { + for (i = start; i < end; i++) + { + gf = mdatoms->cFREEZE[i]; + fam = 0; + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + fam += gmx::square(f[i][m]); + } + } + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + else + { + for (i = start; i < end; i++) + { + fam = norm2(f[i]); + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + + if (la_max >= 0 && DOMAINDECOMP(cr)) + { + a_max = cr->dd->gatindex[la_max]; + } + else + { + a_max = la_max; + } + if (PAR(cr)) + { + snew(sum, 2*cr->nnodes+1); + sum[2*cr->nodeid] = fmax2; + sum[2*cr->nodeid+1] = a_max; + sum[2*cr->nnodes] = fnorm2; + gmx_sumd(2*cr->nnodes+1, sum, cr); + fnorm2 = sum[2*cr->nnodes]; + /* Determine the global maximum */ + for (i = 0; i < cr->nnodes; i++) + { + if (sum[2*i] > fmax2) + { + fmax2 = sum[2*i]; + a_max = (int)(sum[2*i+1] + 0.5); + } + } + sfree(sum); + } + + if (fnorm) + { + *fnorm = sqrt(fnorm2); + } + if (fmax) + { + *fmax = sqrt(fmax2); + } + if (a_fmax) + { + *a_fmax = a_max; + } +} + +//! Compute the norm of the force +static void get_state_f_norm_max(t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, + em_state_t *ems) +{ + get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax); +} + +//! Initialize the energy minimization +void init_em(FILE *fplog, const char *title, + t_commrec *cr, t_inputrec *ir, + t_state *state_global, gmx_mtop_t *top_global, + em_state_t *ems, gmx_localtop_t **top, + rvec **f, + t_nrnb *nrnb, rvec mu_tot, + t_forcerec *fr, gmx_enerdata_t **enerd, + t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat, + gmx_vsite_t *vsite, gmx_constr_t constr, + int nfile, const t_filenm fnm[], + gmx_mdoutf_t *outf, t_mdebin **mdebin, + int imdport, unsigned long gmx_unused Flags, + gmx_wallcycle_t wcycle) +{ + int i; + real dvdl_constr; + + if (fplog) + { + fprintf(fplog, "Initiating %s\n", title); + } + + state_global->ngtc = 0; + + /* Initialize lambda variables */ + initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL); + + init_nrnb(nrnb); + + /* Interactive molecular dynamics */ + init_IMD(ir, cr, top_global, fplog, 1, state_global->x, + nfile, fnm, NULL, imdport, Flags); + + if (DOMAINDECOMP(cr)) + { + *top = dd_init_local_top(top_global); + + dd_init_local_state(cr->dd, state_global, &ems->s); + + *f = NULL; + + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + &ems->s, &ems->f, mdatoms, *top, + fr, vsite, constr, + nrnb, NULL, FALSE); + dd_store_state(cr->dd, &ems->s); + + *graph = NULL; + } + else + { + snew(*f, top_global->natoms); + + /* Just copy the state */ + ems->s = *state_global; + /* We need to allocate one element extra, since we might use + * (unaligned) 4-wide SIMD loads to access rvec entries. + */ + snew(ems->s.x, ems->s.nalloc + 1); + snew(ems->f, ems->s.nalloc+1); + snew(ems->s.v, ems->s.nalloc+1); + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(state_global->x[i], ems->s.x[i]); + } + copy_mat(state_global->box, ems->s.box); + + *top = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO); + + forcerec_set_excl_load(fr, *top); + + setup_bonded_threading(fr, &(*top)->idef); + + if (ir->ePBC != epbcNONE && !fr->bMolPBC) + { + *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE); + } + else + { + *graph = NULL; + } + + atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); + update_mdatoms(mdatoms, state_global->lambda[efptFEP]); + + if (vsite) + { + set_vsite_top(vsite, *top, mdatoms, cr); + } + } + + if (constr) + { + if (ir->eConstrAlg == econtSHAKE && + gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) + { + gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", + econstr_names[econtSHAKE], econstr_names[econtLINCS]); + } + + if (!DOMAINDECOMP(cr)) + { + set_constraints(constr, *top, ir, mdatoms, cr); + } + + if (!ir->bContinuation) + { + /* Constrain the starting coordinates */ + dvdl_constr = 0; + constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef, + ir, cr, -1, 0, 1.0, mdatoms, + ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box, + ems->s.lambda[efptFEP], &dvdl_constr, + NULL, NULL, nrnb, econqCoord); + } + } + + if (PAR(cr)) + { + *gstat = global_stat_init(ir); + } + else + { + *gstat = NULL; + } + + *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle); + + snew(*enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + *enerd); + + if (mdebin != NULL) + { + /* Init bin for energy stuff */ + *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL); + } + + clear_rvec(mu_tot); + calc_shifts(ems->s.box, fr->shift_vec); +} + +//! Finalize the minimization +static void finish_em(t_commrec *cr, gmx_mdoutf_t outf, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + if (!(cr->duty & DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + done_mdoutf(outf); + + em_time_end(walltime_accounting, wcycle); +} + +//! Swap two different EM states during minimization +static void swap_em_state(em_state_t *ems1, em_state_t *ems2) +{ + em_state_t tmp; + + tmp = *ems1; + *ems1 = *ems2; + *ems2 = tmp; +} + +//! Copy coordinate from an EM state to a "normal" state structure +static void copy_em_coords(em_state_t *ems, t_state *state) +{ + int i; + + for (i = 0; (i < state->natoms); i++) + { + copy_rvec(ems->s.x[i], state->x[i]); + } +} + +//! Save the EM trajectory +static void write_em_traj(FILE *fplog, t_commrec *cr, + gmx_mdoutf_t outf, + gmx_bool bX, gmx_bool bF, const char *confout, + gmx_mtop_t *top_global, + t_inputrec *ir, gmx_int64_t step, + em_state_t *state, + t_state *state_global) +{ + int mdof_flags; + gmx_bool bIMDout = FALSE; + + + /* Shall we do IMD output? */ + if (ir->bIMD) + { + bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup)); + } + + if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr)) + { + copy_em_coords(state, state_global); + } + + mdof_flags = 0; + if (bX) + { + mdof_flags |= MDOF_X; + } + if (bF) + { + mdof_flags |= MDOF_F; + } + + /* If we want IMD output, set appropriate MDOF flag */ + if (ir->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, (double)step, + &state->s, state_global, state->f); + + if (confout != NULL && MASTER(cr)) + { + if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) + { + /* Make molecules whole only for confout writing */ + do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global, + state_global->x); + } + + write_sto_conf_mtop(confout, + *top_global->name, top_global, + state_global->x, NULL, ir->ePBC, state_global->box); + } +} + +//! Do one minimization step +static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, + gmx_bool bMolPBC, + em_state_t *ems1, real a, rvec *f, em_state_t *ems2, + gmx_constr_t constr, gmx_localtop_t *top, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_int64_t count) + +{ + t_state *s1, *s2; + int i; + int start, end; + rvec *x1, *x2; + real dvdl_constr; + int nthreads gmx_unused; + + s1 = &ems1->s; + s2 = &ems2->s; + + if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) + { + gmx_incons("state mismatch in do_em_step"); + } + + s2->flags = s1->flags; + + if (s2->nalloc != s1->nalloc) + { + s2->nalloc = s1->nalloc; + /* We need to allocate one element extra, since we might use + * (unaligned) 4-wide SIMD loads to access rvec entries. + */ + srenew(s2->x, s1->nalloc + 1); + srenew(ems2->f, s1->nalloc); + if (s2->flags & (1<<estCGP)) + { + srenew(s2->cg_p, s1->nalloc + 1); + } + } + + s2->natoms = s1->natoms; + copy_mat(s1->box, s2->box); + /* Copy free energy state */ + for (i = 0; i < efptNR; i++) + { + s2->lambda[i] = s1->lambda[i]; + } + copy_mat(s1->box, s2->box); + + start = 0; + end = md->homenr; + + x1 = s1->x; + x2 = s2->x; + + // cppcheck-suppress unreadVariable + nthreads = gmx_omp_nthreads_get(emntUpdate); +#pragma omp parallel num_threads(nthreads) + { + int gf, i, m; + + gf = 0; +#pragma omp for schedule(static) nowait + for (i = start; i < end; i++) + { + try + { + if (md->cFREEZE) + { + gf = md->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[gf][m]) + { + x2[i][m] = x1[i][m]; + } + else + { + x2[i][m] = x1[i][m] + a*f[i][m]; + } + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + + if (s2->flags & (1<<estCGP)) + { + /* Copy the CG p vector */ + x1 = s1->cg_p; + x2 = s2->cg_p; +#pragma omp for schedule(static) nowait + for (i = start; i < end; i++) + { + // Trivial OpenMP block that does not throw + copy_rvec(x1[i], x2[i]); + } + } + + if (DOMAINDECOMP(cr)) + { + s2->ddp_count = s1->ddp_count; + if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) + { +#pragma omp barrier + s2->cg_gl_nalloc = s1->cg_gl_nalloc; + try + { + /* We need to allocate one element extra, since we might use + * (unaligned) 4-wide SIMD loads to access rvec entries. + */ + srenew(s2->cg_gl, s2->cg_gl_nalloc + 1); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +#pragma omp barrier + } + s2->ncg_gl = s1->ncg_gl; +#pragma omp for schedule(static) nowait + for (i = 0; i < s2->ncg_gl; i++) + { + s2->cg_gl[i] = s1->cg_gl[i]; + } + s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; + } + } + + if (constr) + { + wallcycle_start(wcycle, ewcCONSTR); + dvdl_constr = 0; + constrain(NULL, TRUE, TRUE, constr, &top->idef, + ir, cr, count, 0, 1.0, md, + s1->x, s2->x, NULL, bMolPBC, s2->box, + s2->lambda[efptBONDED], &dvdl_constr, + NULL, NULL, nrnb, econqCoord); + wallcycle_stop(wcycle, ewcCONSTR); + } +} + +//! Prepare EM for using domain decomposition parallellization +static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr, + gmx_mtop_t *top_global, t_inputrec *ir, + em_state_t *ems, gmx_localtop_t *top, + t_mdatoms *mdatoms, t_forcerec *fr, + gmx_vsite_t *vsite, gmx_constr_t constr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle) +{ + /* Repartition the domain decomposition */ + dd_partition_system(fplog, step, cr, FALSE, 1, + NULL, top_global, ir, + &ems->s, &ems->f, + mdatoms, top, fr, vsite, constr, + nrnb, wcycle, FALSE); + dd_store_state(cr->dd, &ems->s); +} + +//! De one energy evaluation +static void evaluate_energy(FILE *fplog, t_commrec *cr, + gmx_mtop_t *top_global, + em_state_t *ems, gmx_localtop_t *top, + t_inputrec *inputrec, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_global_stat_t gstat, + gmx_vsite_t *vsite, gmx_constr_t constr, + t_fcdata *fcd, + t_graph *graph, t_mdatoms *mdatoms, + t_forcerec *fr, rvec mu_tot, + gmx_enerdata_t *enerd, tensor vir, tensor pres, + gmx_int64_t count, gmx_bool bFirst) +{ + real t; + gmx_bool bNS; + tensor force_vir, shake_vir, ekin; + real dvdl_constr, prescorr, enercorr, dvdlcorr; + real terminate = 0; + + /* Set the time to the initial time, the time does not change during EM */ + t = inputrec->init_t; + + if (bFirst || + (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) + { + /* This is the first state or an old state used before the last ns */ + bNS = TRUE; + } + else + { + bNS = FALSE; + if (inputrec->nstlist > 0) + { + bNS = TRUE; + } + } + + if (vsite) + { + construct_vsites(vsite, ems->s.x, 1, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, ems->s.box); + } + + if (DOMAINDECOMP(cr) && bNS) + { + /* Repartition the domain decomposition */ + em_dd_partition_system(fplog, count, cr, top_global, inputrec, + ems, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Calc force & energy on new trial position */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + do_force(fplog, cr, inputrec, + count, nrnb, wcycle, top, &top_global->groups, + ems->s.box, ems->s.x, &ems->s.hist, + ems->f, force_vir, mdatoms, enerd, fcd, + ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE, + GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | + GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | + (bNS ? GMX_FORCE_NS : 0)); + + /* Clear the unused shake virial and pressure */ + clear_mat(shake_vir); + clear_mat(pres); + + /* Communicate stuff when parallel */ + if (PAR(cr) && inputrec->eI != eiNM) + { + wallcycle_start(wcycle, ewcMoveE); + + global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, + inputrec, NULL, NULL, NULL, 1, &terminate, + NULL, FALSE, + CGLO_ENERGY | + CGLO_PRESSURE | + CGLO_CONSTRAINT); + + wallcycle_stop(wcycle, ewcMoveE); + } + + /* Calculate long range corrections to pressure and energy */ + calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW], + pres, force_vir, &prescorr, &enercorr, &dvdlcorr); + enerd->term[F_DISPCORR] = enercorr; + enerd->term[F_EPOT] += enercorr; + enerd->term[F_PRES] += prescorr; + enerd->term[F_DVDL] += dvdlcorr; + + ems->epot = enerd->term[F_EPOT]; + + if (constr) + { + /* Project out the constraint components of the force */ + wallcycle_start(wcycle, ewcCONSTR); + dvdl_constr = 0; + constrain(NULL, FALSE, FALSE, constr, &top->idef, + inputrec, cr, count, 0, 1.0, mdatoms, + ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box, + ems->s.lambda[efptBONDED], &dvdl_constr, + NULL, &shake_vir, nrnb, econqForceDispl); + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + m_add(force_vir, shake_vir, vir); + wallcycle_stop(wcycle, ewcCONSTR); + } + else + { + copy_mat(force_vir, vir); + } + + clear_mat(ekin); + enerd->term[F_PRES] = + calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); + + sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); + + if (EI_ENERGY_MINIMIZATION(inputrec->eI)) + { + get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems); + } +} + +//! Parallel utility summing energies and forces +static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + rvec *fm, *fb, *fmg; + t_block *cgs_gl; + int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; + double partsum; + unsigned char *grpnrFREEZE; + + if (debug) + { + fprintf(debug, "Doing reorder_partsum\n"); + } + + fm = s_min->f; + fb = s_b->f; + + cgs_gl = dd_charge_groups_global(cr->dd); + index = cgs_gl->index; + + /* Collect fm in a global vector fmg. + * This conflicts with the spirit of domain decomposition, + * but to fully optimize this a much more complicated algorithm is required. + */ + snew(fmg, top_global->natoms); + + ncg = s_min->s.ncg_gl; + cg_gl = s_min->s.cg_gl; + i = 0; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + copy_rvec(fm[i], fmg[a]); + i++; + } + } + gmx_sum(top_global->natoms*3, fmg[0], cr); + + /* Now we will determine the part of the sum for the cgs in state s_b */ + ncg = s_b->s.ncg_gl; + cg_gl = s_b->s.cg_gl; + partsum = 0; + i = 0; + gf = 0; + grpnrFREEZE = top_global->groups.grpnr[egcFREEZE]; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + if (mdatoms->cFREEZE && grpnrFREEZE) + { + gf = grpnrFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; + } + } + i++; + } + } + + sfree(fmg); + + return partsum; +} + +//! Print some stuff, like beta, whatever that means. +static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + rvec *fm, *fb; + double sum; + int gf, i, m; + + /* This is just the classical Polak-Ribiere calculation of beta; + * it looks a bit complicated since we take freeze groups into account, + * and might have to sum it in parallel runs. + */ + + if (!DOMAINDECOMP(cr) || + (s_min->s.ddp_count == cr->dd->ddp_count && + s_b->s.ddp_count == cr->dd->ddp_count)) + { + fm = s_min->f; + fb = s_b->f; + sum = 0; + gf = 0; + /* This part of code can be incorrect with DD, + * since the atom ordering in s_b and s_min might differ. + */ + for (i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + sum += (fb[i][m] - fm[i][m])*fb[i][m]; + } + } + } + } + else + { + /* We need to reorder cgs while summing */ + sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b); + } + if (PAR(cr)) + { + gmx_sumd(1, &sum, cr); + } + + return sum/gmx::square(s_min->fnorm); +} + +namespace gmx +{ + +/*! \brief Do conjugate gradients minimization + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_cg(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + const char *CG = "Polak-Ribiere Conjugate Gradients"; + + em_state_t *s_min, *s_a, *s_b, *s_c; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + rvec *p, *sf; + double gpa, gpb, gpc, tmp, minstep; + real fnormn; + real stepsize; + real a, b, c, beta = 0.0; + real epot_repl = 0; + real pnorm; + t_mdebin *mdebin; + gmx_bool converged, foundlower; + rvec mu_tot; + gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; + tensor vir, pres; + int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; + gmx_mdoutf_t outf; + int i, m, gf, step, nminstep; + + step = 0; + + s_min = init_em_state(); + s_a = init_em_state(); + s_b = init_em_state(); + s_c = init_em_state(); + + /* Init em and store the local state in s_min */ + init_em(fplog, CG, cr, inputrec, + state_global, top_global, s_min, &top, &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, CG); + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + if (MASTER(cr)) + { + sp_header(stderr, CG, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, CG, inputrec->em_tol, number_steps); + } + + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + evaluate_energy(fplog, cr, + top_global, s_min, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, TRUE); + where(); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + where(); + + /* Estimate/guess the initial stepsize */ + stepsize = inputrec->em_stepsize/s_min->fnorm; + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(stderr, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(fplog, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + /* Start the loop over CG steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* start taking steps in a new direction + * First time we enter the routine, beta=0, and the direction is + * simply the negative gradient. + */ + + /* Calculate the new direction in p, and the gradient in this direction, gpa */ + p = s_min->s.cg_p; + sf = s_min->f; + gpa = 0; + gf = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!inputrec->opts.nFreeze[gf][m]) + { + p[i][m] = sf[i][m] + beta*p[i][m]; + gpa -= p[i][m]*sf[i][m]; + /* f is negative gradient, thus the sign */ + } + else + { + p[i][m] = 0; + } + } + } + + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpa, cr); + } + + /* Calculate the norm of the search vector */ + get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL); + + /* Just in case stepsize reaches zero due to numerical precision... */ + if (stepsize <= 0) + { + stepsize = inputrec->em_stepsize/pnorm; + } + + /* + * Double check the value of the derivative in the search direction. + * If it is positive it must be due to the old information in the + * CG formula, so just remove that and start over with beta=0. + * This corresponds to a steepest descent step. + */ + if (gpa > 0) + { + beta = 0; + step--; /* Don't count this step since we are restarting */ + continue; /* Go back to the beginning of the big for-loop */ + } + + /* Calculate minimum allowed stepsize, before the average (norm) + * relative change in coordinate is smaller than precision + */ + minstep = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + tmp = fabs(s_min->s.x[i][m]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = p[i][m]/tmp; + minstep += tmp*tmp; + } + } + /* Add up from all CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &minstep, cr); + } + + minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms)); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + write_em_traj(fplog, cr, outf, do_x, do_f, NULL, + top_global, inputrec, step, + s_min, state_global); + + /* Take a step downhill. + * In theory, we should minimize the function along this direction. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new CG step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * the continue straight to the next CG step without trying to find any minimum. + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to even accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + s_a->epot = s_min->epot; + a = 0.0; + c = a + stepsize; /* reference position along line is zero */ + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) + { + em_dd_partition_system(fplog, step, cr, top_global, inputrec, + s_min, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step (new coords in s_c) */ + do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c, + constr, top, nrnb, wcycle, -1); + + neval++; + /* Calculate energy for the trial step */ + evaluate_energy(fplog, cr, + top_global, s_c, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, FALSE); + + /* Calc derivative along line */ + p = s_c->s.cg_p; + sf = s_c->f; + gpc = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + /* This is the max amount of increase in energy we tolerate */ + tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot); + + /* Accept the step if the energy is lower, or if it is not significantly higher + * and the line derivative is still negative. + */ + if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) + { + foundlower = TRUE; + /* Great, we found a better energy. Increase step for next iteration + * if we are still going down, decrease it otherwise + */ + if (gpc < 0) + { + stepsize *= 1.618034; /* The golden section */ + } + else + { + stepsize *= 0.618034; /* 1/golden section */ + } + } + else + { + /* New energy is the same or higher. We will have to do some work + * to find a smaller value in the interval. Take smaller step next time! + */ + foundlower = FALSE; + stepsize *= 0.618034; + } + + + + + /* OK, if we didn't find a lower value we will have to locate one now - there must + * be one in the interval [a=0,c]. + * The same thing is valid here, though: Don't spend dozens of iterations to find + * the line minimum. We try to interpolate based on the derivative at the endpoints, + * and only continue until we find a lower value. In most cases this means 1-2 iterations. + * + * I also have a safeguard for potentially really pathological functions so we never + * take more than 20 steps before we give up ... + * + * If we already found a lower value we just skip this step and continue to the update. + */ + if (!foundlower) + { + nminstep = 0; + + do + { + /* Select a new trial point. + * If the derivatives at points a & c have different sign we interpolate to zero, + * otherwise just do a bisection. + */ + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, -1, cr, top_global, inputrec, + s_min, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step to this new point - new coords in s_b */ + do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b, + constr, top, nrnb, wcycle, -1); + + neval++; + /* Calculate energy for the trial step */ + evaluate_energy(fplog, cr, + top_global, s_b, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, FALSE); + + /* p does not change within a step, but since the domain decomposition + * might change, we have to use cg_p of s_b here. + */ + p = s_b->s.cg_p; + sf = s_b->f; + gpb = 0; + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + if (debug) + { + fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", + s_a->epot, s_b->epot, s_c->epot, gpb); + } + + epot_repl = s_b->epot; + + /* Keep one of the intervals based on the value of the derivative at the new point */ + if (gpb > 0) + { + /* Replace c endpoint with b */ + swap_em_state(s_b, s_c); + c = b; + gpc = gpb; + } + else + { + /* Replace a endpoint with b */ + swap_em_state(s_b, s_a); + a = b; + gpa = gpb; + } + + /* + * Stop search as soon as we find a value smaller than the endpoints. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && + (nminstep < 20)); + + if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || + nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If beta==0 this was steepest descent, and then we give up. + * If not, set beta=0 and restart with steepest descent before quitting. + */ + if (beta == 0.0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory before giving up */ + beta = 0.0; + continue; + } + } + + /* Select min energy state of A & C, put the best in B. + */ + if (s_c->epot < s_a->epot) + { + if (debug) + { + fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", + s_c->epot, s_a->epot); + } + swap_em_state(s_b, s_c); + gpb = gpc; + } + else + { + if (debug) + { + fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", + s_a->epot, s_c->epot); + } + swap_em_state(s_b, s_a); + gpb = gpa; + } + + } + else + { + if (debug) + { + fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", + s_c->epot); + } + swap_em_state(s_b, s_c); + gpb = gpc; + } + + /* new search direction */ + /* beta = 0 means forget all memory and restart with steepest descents. */ + if (nstcg && ((step % nstcg) == 0)) + { + beta = 0.0; + } + else + { + /* s_min->fnorm cannot be zero, because then we would have converged + * and broken out. + */ + + /* Polak-Ribiere update. + * Change to fnorm2/fnorm2_old for Fletcher-Reeves + */ + beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); + } + /* Limit beta to prevent oscillations */ + if (fabs(beta) > 5.0) + { + beta = 0.0; + } + + + /* update positions */ + swap_em_state(s_min, s_b); + gpa = gpb; + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (bVerbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, s_min->epot, s_min->fnorm/sqrtNumAtoms, + s_min->fmax, s_min->a_fmax+1); + fflush(stderr); + } + /* Store the new (lower) energies */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE); + + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + + /* Send energies and positions to the IMD client if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (s_min->fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (s_min->fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); + warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + if (MASTER(cr)) + { + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) + { + /* Write final value to log since we didn't do anything the last step */ + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) + { + /* Write final energy file entries */ + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); + + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + s_min, state_global); + + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fnormn = s_min->fnorm/sqrtNumAtoms; + print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); + + return 0; +} /* That's all folks */ + + +/*! \brief Do L-BFGS conjugate gradients minimization + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_lbfgs(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + static const char *LBFGS = "Low-Memory BFGS Minimizer"; + em_state_t ems; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + int ncorr, nmaxcorr, point, cp, neval, nminstep; + double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep; + real *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg; + real *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp; + real a, b, c, maxdelta, delta; + real diag, Epot0, Epot, EpotA, EpotB, EpotC; + real dgdx, dgdg, sq, yr, beta; + t_mdebin *mdebin; + gmx_bool converged; + rvec mu_tot; + real fnorm, fmax; + gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; + tensor vir, pres; + int start, end, number_steps; + gmx_mdoutf_t outf; + int i, k, m, n, nfmax, gf, step; + int mdof_flags; + + if (PAR(cr)) + { + gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n"); + } + + if (NULL != constr) + { + gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); + } + + n = 3*state_global->natoms; + nmaxcorr = inputrec->nbfgscorr; + + /* Allocate memory */ + /* Use pointers to real so we dont have to loop over both atoms and + * dimensions all the time... + * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real + * that point to the same memory. + */ + snew(xa, n); + snew(xb, n); + snew(xc, n); + snew(fa, n); + snew(fb, n); + snew(fc, n); + snew(frozen, n); + + snew(p, n); + snew(lastx, n); + snew(lastf, n); + snew(rho, nmaxcorr); + snew(alpha, nmaxcorr); + + snew(dx, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dx[i], n); + } + + snew(dg, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dg[i], n); + } + + step = 0; + neval = 0; + + /* Init em */ + init_em(fplog, LBFGS, cr, inputrec, + state_global, top_global, &ems, &top, &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); + /* Do_lbfgs is not completely updated like do_steep and do_cg, + * so we free some memory again. + */ + sfree(ems.s.x); + sfree(ems.f); + + xx = (real *)state_global->x; + ff = (real *)f; + + start = 0; + end = mdatoms->homenr; + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); + + do_log = do_ene = do_x = do_f = TRUE; + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ + gf = 0; + for (i = start; i < end; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + frozen[3*i+m] = inputrec->opts.nFreeze[gf][m]; + } + } + if (MASTER(cr)) + { + sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); + } + + if (vsite) + { + construct_vsites(vsite, state_global->x, 1, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state_global->box); + } + + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole + */ + neval++; + ems.s.x = state_global->x; + ems.f = f; + evaluate_energy(fplog, cr, + top_global, &ems, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, TRUE); + where(); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + where(); + + /* This is the starting energy */ + Epot = enerd->term[F_EPOT]; + + fnorm = ems.fnorm; + fmax = ems.fmax; + nfmax = ems.a_fmax; + + /* Set the initial step. + * since it will be multiplied by the non-normalized search direction + * vector (force vector the first time), we scale it by the + * norm of the force. + */ + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(stderr, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); + fprintf(stderr, " F-Norm = %12.5e\n", fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(fplog, " F-max = %12.5e on atom %d\n", fmax, nfmax+1); + fprintf(fplog, " F-Norm = %12.5e\n", fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + + // Point is an index to the memory of search directions, where 0 is the first one. + point = 0; + + // Set initial search direction to the force (-gradient), or 0 for frozen particles. + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = ff[i]; /* Initial search direction */ + } + else + { + dx[point][i] = 0; + } + } + + // Stepsize will be modified during the search, and actually it is not critical + // (the main efficiency in the algorithm comes from changing directions), but + // we still need an initial value, so estimate it as the inverse of the norm + // so we take small steps where the potential fluctuates a lot. + stepsize = 1.0/fnorm; + + /* Start the loop over BFGS steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + + ncorr = 0; + + /* Set the gradient from the force */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + mdof_flags = 0; + if (do_x) + { + mdof_flags |= MDOF_X; + } + + if (do_f) + { + mdof_flags |= MDOF_F; + } + + if (inputrec->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, (real)step, state_global, state_global, f); + + /* Do the linesearching in the direction dx[point][0..(n-1)] */ + + /* make s a pointer to current search direction - point=0 first time we get here */ + s = dx[point]; + + // calculate line gradient in position A + for (gpa = 0, i = 0; i < n; i++) + { + gpa -= s[i]*ff[i]; + } + + /* Calculate minimum allowed stepsize along the line, before the average (norm) + * relative change in coordinate is smaller than precision + */ + for (minstep = 0, i = 0; i < n; i++) + { + tmp = fabs(xx[i]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = s[i]/tmp; + minstep += tmp*tmp; + } + minstep = GMX_REAL_EPS/sqrt(minstep/n); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + // Before taking any steps along the line, store the old position + for (i = 0; i < n; i++) + { + lastx[i] = xx[i]; + lastf[i] = ff[i]; + } + Epot0 = Epot; + + for (i = 0; i < n; i++) + { + xa[i] = xx[i]; + } + + /* Take a step downhill. + * In theory, we should find the actual minimum of the function in this + * direction, somewhere along the line. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new BFGS step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * continue straight to the next BFGS step without trying to find any minimum, + * i.e. we change the search direction too. If the line was smooth, it is + * likely we are in a smooth region, and then it makes sense to take longer + * steps in the modified search direction too. + * + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. Then we need to start by finding a lower + * value before we change search direction. Since the energy was apparently + * quite rough, we need to decrease the step size. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + + // State "A" is the first position along the line. + // reference position along line is initially zero + EpotA = Epot0; + a = 0.0; + + // Check stepsize first. We do not allow displacements + // larger than emstep. + // + do + { + // Pick a new position C by adding stepsize to A. + c = a + stepsize; + + // Calculate what the largest change in any individual coordinate + // would be (translation along line * gradient along line) + maxdelta = 0; + for (i = 0; i < n; i++) + { + delta = c*s[i]; + if (delta > maxdelta) + { + maxdelta = delta; + } + } + // If any displacement is larger than the stepsize limit, reduce the step + if (maxdelta > inputrec->em_stepsize) + { + stepsize *= 0.1; + } + } + while (maxdelta > inputrec->em_stepsize); + + // Take a trial step and move the coordinate array xc[] to position C + for (i = 0; i < n; i++) + { + xc[i] = lastx[i] + c*s[i]; + } + + neval++; + // Calculate energy for the trial step in position C + ems.s.x = (rvec *)xc; + ems.f = (rvec *)fc; + evaluate_energy(fplog, cr, + top_global, &ems, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, step, FALSE); + EpotC = ems.epot; + + // Calc line gradient in position C + for (gpc = 0, i = 0; i < n; i++) + { + gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + // This is the max amount of increase in energy we tolerate. + // By allowing VERY small changes (close to numerical precision) we + // frequently find even better (lower) final energies. + tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA); + + // Accept the step if the energy is lower in the new position C (compared to A), + // or if it is not significantly higher and the line derivative is still negative. + if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp))) + { + // Great, we found a better energy. We no longer try to alter the + // stepsize, but simply accept this new better position. The we select a new + // search direction instead, which will be much more efficient than continuing + // to take smaller steps along a line. Set fnorm based on the new C position, + // which will be used to update the stepsize to 1/fnorm further down. + foundlower = TRUE; + fnorm = ems.fnorm; + } + else + { + // If we got here, the energy is NOT lower in point C, i.e. it will be the same + // or higher than in point A. In this case it is pointless to move to point C, + // so we will have to do more iterations along the same line to find a smaller + // value in the interval [A=0.0,C]. + // Here, A is still 0.0, but that will change when we do a search in the interval + // [0.0,C] below. That search we will do by interpolation or bisection rather + // than with the stepsize, so no need to modify it. For the next search direction + // it will be reset to 1/fnorm anyway. + foundlower = FALSE; + } + + if (!foundlower) + { + // OK, if we didn't find a lower value we will have to locate one now - there must + // be one in the interval [a,c]. + // The same thing is valid here, though: Don't spend dozens of iterations to find + // the line minimum. We try to interpolate based on the derivative at the endpoints, + // and only continue until we find a lower value. In most cases this means 1-2 iterations. + // I also have a safeguard for potentially really pathological functions so we never + // take more than 20 steps before we give up. + // If we already found a lower value we just skip this step and continue to the update. + nminstep = 0; + do + { + // Select a new trial point B in the interval [A,C]. + // If the derivatives at points a & c have different sign we interpolate to zero, + // otherwise just do a bisection since there might be multiple minima/maxima + // inside the interval. + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + // Take a trial step to point B + for (i = 0; i < n; i++) + { + xb[i] = lastx[i] + b*s[i]; + } + + neval++; + // Calculate energy for the trial step in point B + ems.s.x = (rvec *)xb; + ems.f = (rvec *)fb; + evaluate_energy(fplog, cr, + top_global, &ems, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, step, FALSE); + EpotB = ems.epot; + fnorm = ems.fnorm; + + // Calculate gradient in point B + for (gpb = 0, i = 0; i < n; i++) + { + gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ + + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative + // at the new point B, and rename the endpoints of this new interval A and C. + if (gpb > 0) + { + /* Replace c endpoint with b */ + EpotC = EpotB; + c = b; + gpc = gpb; + /* swap coord pointers b/c */ + xtmp = xb; + ftmp = fb; + xb = xc; + fb = fc; + xc = xtmp; + fc = ftmp; + } + else + { + /* Replace a endpoint with b */ + EpotA = EpotB; + a = b; + gpa = gpb; + /* swap coord pointers a/b */ + xtmp = xb; + ftmp = fb; + xb = xa; + fb = fa; + xa = xtmp; + fa = ftmp; + } + + /* + * Stop search as soon as we find a value smaller than the endpoints, + * or if the tolerance is below machine precision. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20)); + + if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If ncorr==0 this was steepest descent, and then we give up. + * If not, reset memory to restart as steepest descent before quitting. + */ + if (ncorr == 0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory */ + ncorr = 0; + /* Search in gradient direction */ + for (i = 0; i < n; i++) + { + dx[point][i] = ff[i]; + } + /* Reset stepsize */ + stepsize = 1.0/fnorm; + continue; + } + } + + /* Select min energy state of A & C, put the best in xx/ff/Epot + */ + if (EpotC < EpotA) + { + Epot = EpotC; + /* Use state C */ + for (i = 0; i < n; i++) + { + xx[i] = xc[i]; + ff[i] = fc[i]; + } + step_taken = c; + } + else + { + Epot = EpotA; + /* Use state A */ + for (i = 0; i < n; i++) + { + xx[i] = xa[i]; + ff[i] = fa[i]; + } + step_taken = a; + } + + } + else + { + /* found lower */ + Epot = EpotC; + /* Use state C */ + for (i = 0; i < n; i++) + { + xx[i] = xc[i]; + ff[i] = fc[i]; + } + step_taken = c; + } + + /* Update the memory information, and calculate a new + * approximation of the inverse hessian + */ + + /* Have new data in Epot, xx, ff */ + if (ncorr < nmaxcorr) + { + ncorr++; + } + + for (i = 0; i < n; i++) + { + dg[point][i] = lastf[i]-ff[i]; + dx[point][i] *= step_taken; + } + + dgdg = 0; + dgdx = 0; + for (i = 0; i < n; i++) + { + dgdg += dg[point][i]*dg[point][i]; + dgdx += dg[point][i]*dx[point][i]; + } + + diag = dgdx/dgdg; + + rho[point] = 1.0/dgdx; + point++; + + if (point >= nmaxcorr) + { + point = 0; + } + + /* Update */ + for (i = 0; i < n; i++) + { + p[i] = ff[i]; + } + + cp = point; + + /* Recursive update. First go back over the memory points */ + for (k = 0; k < ncorr; k++) + { + cp--; + if (cp < 0) + { + cp = ncorr-1; + } + + sq = 0; + for (i = 0; i < n; i++) + { + sq += dx[cp][i]*p[i]; + } + + alpha[cp] = rho[cp]*sq; + + for (i = 0; i < n; i++) + { + p[i] -= alpha[cp]*dg[cp][i]; + } + } + + for (i = 0; i < n; i++) + { + p[i] *= diag; + } + + /* And then go forward again */ + for (k = 0; k < ncorr; k++) + { + yr = 0; + for (i = 0; i < n; i++) + { + yr += p[i]*dg[cp][i]; + } + + beta = rho[cp]*yr; + beta = alpha[cp]-beta; + + for (i = 0; i < n; i++) + { + p[i] += beta*dx[cp][i]; + } + + cp++; + if (cp >= ncorr) + { + cp = 0; + } + } + + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = p[i]; + } + else + { + dx[point][i] = 0; + } + } + + /* Test whether the convergence criterion is met */ + get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax); + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (bVerbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1); + fflush(stderr); + } + /* Store the new (lower) energies */ + upd_mdebin(mdebin, FALSE, FALSE, (double)step, + mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box, + NULL, NULL, vir, pres, NULL, mu_tot, constr); + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + + /* Send x and E to IMD client, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + // Reset stepsize in we are doing more iterations + stepsize = 1.0/fnorm; + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE); + warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) /* Write final value to log since we didn't do anythin last step */ + { + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) /* Write final energy file entries */ + { + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : NULL, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = !do_per_step(step, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + &ems, state_global); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, + number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms); + print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, + number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); + + return 0; +} /* That's all folks */ + +/*! \brief Do steepest descents minimization + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_steep(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + const char *SD = "Steepest Descents"; + em_state_t *s_min, *s_try; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + real stepsize; + real ustep, fnormn; + gmx_mdoutf_t outf; + t_mdebin *mdebin; + gmx_bool bDone, bAbort, do_x, do_f; + tensor vir, pres; + rvec mu_tot; + int nsteps; + int count = 0; + int steps_accepted = 0; + + s_min = init_em_state(); + s_try = init_em_state(); + + /* Init em and store the local state in s_try */ + init_em(fplog, SD, cr, inputrec, + state_global, top_global, s_try, &top, &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, SD); + + /* Set variables for stepsize (in nm). This is the largest + * step that we are going to make in any direction. + */ + ustep = inputrec->em_stepsize; + stepsize = 0; + + /* Max number of steps */ + nsteps = inputrec->nsteps; + + if (MASTER(cr)) + { + /* Print to the screen */ + sp_header(stderr, SD, inputrec->em_tol, nsteps); + } + if (fplog) + { + sp_header(fplog, SD, inputrec->em_tol, nsteps); + } + + /**** HERE STARTS THE LOOP **** + * count is the counter for the number of steps + * bDone will be TRUE when the minimization has converged + * bAbort will be TRUE when nsteps steps have been performed or when + * the stepsize becomes smaller than is reasonable for machine precision + */ + count = 0; + bDone = FALSE; + bAbort = FALSE; + while (!bDone && !bAbort) + { + bAbort = (nsteps >= 0) && (count == nsteps); + + /* set new coordinates, except for first step */ + if (count > 0) + { + do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, + s_min, stepsize, s_min->f, s_try, + constr, top, nrnb, wcycle, count); + } + + evaluate_energy(fplog, cr, + top_global, s_try, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, count, count == 0); + + if (MASTER(cr)) + { + print_ebin_header(fplog, count, count); + } + + if (count == 0) + { + s_min->epot = s_try->epot; + } + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (bVerbose) + { + fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", + count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, + ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r'); + fflush(stderr); + } + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + /* Store the new (lower) energies */ + upd_mdebin(mdebin, FALSE, FALSE, (double)count, + mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals, + s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE); + + print_ebin(mdoutf_get_fp_ene(outf), TRUE, + do_per_step(steps_accepted, inputrec->nstdisreout), + do_per_step(steps_accepted, inputrec->nstorireout), + fplog, count, count, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts)); + fflush(fplog); + } + } + + /* Now if the new energy is smaller than the previous... + * or if this is the first step! + * or if we did random steps! + */ + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + steps_accepted++; + + /* Test whether the convergence criterion is met... */ + bDone = (s_try->fmax < inputrec->em_tol); + + /* Copy the arrays for force, positions and energy */ + /* The 'Min' array always holds the coords and forces of the minimal + sampled energy */ + swap_em_state(s_min, s_try); + if (count > 0) + { + ustep *= 1.2; + } + + /* Write to trn, if necessary */ + do_x = do_per_step(steps_accepted, inputrec->nstxout); + do_f = do_per_step(steps_accepted, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, NULL, + top_global, inputrec, count, + s_min, state_global); + } + else + { + /* If energy is not smaller make the step smaller... */ + ustep *= 0.5; + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, count, cr, top_global, inputrec, + s_min, top, mdatoms, fr, vsite, constr, + nrnb, wcycle); + } + } + + /* Determine new step */ + stepsize = ustep/s_min->fmax; + + /* Check if stepsize is too small, with 1 nm as a characteristic length */ +#if GMX_DOUBLE + if (count == nsteps || ustep < 1e-12) +#else + if (count == nsteps || ustep < 1e-6) +#endif + { + if (MASTER(cr)) + { + warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL); + warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL); + } + bAbort = TRUE; + } + + /* Send IMD energies and positions, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + count++; + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + /* Print some data... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, count, + s_min, state_global); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fnormn = s_min->fnorm/sqrtNumAtoms; + + print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, + s_min->epot, s_min->fmax, s_min->a_fmax, fnormn); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + inputrec->nsteps = count; + + walltime_accounting_set_nsteps_done(walltime_accounting, count); + + return 0; +} /* That's all folks */ + +/*! \brief Do normal modes analysis + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double do_nm(FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose, + int gmx_unused nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int gmx_unused stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t gmx_unused ed, + t_forcerec *fr, + int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed, + real gmx_unused cpt_period, real gmx_unused max_hours, + int imdport, + unsigned long gmx_unused Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + const char *NM = "Normal Mode Analysis"; + gmx_mdoutf_t outf; + int nnodes, node; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + rvec *f; + gmx_global_stat_t gstat; + t_graph *graph; + tensor vir, pres; + rvec mu_tot; + rvec *fneg, *dfdx; + gmx_bool bSparse; /* use sparse matrix storage format */ + size_t sz; + gmx_sparsematrix_t * sparse_matrix = NULL; + real * full_matrix = NULL; + em_state_t * state_work; + + /* added with respect to mdrun */ + int row, col; + real der_range = 10.0*sqrt(GMX_REAL_EPS); + real x_min; + bool bIsMaster = MASTER(cr); + + if (constr != NULL) + { + gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); + } + + state_work = init_em_state(); + + /* Init em and store the local state in state_minimum */ + init_em(fplog, NM, cr, inputrec, + state_global, top_global, state_work, &top, + &f, + nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr, + nfile, fnm, &outf, NULL, imdport, Flags, wcycle); + + gmx_shellfc_t *shellfc = init_shell_flexcon(stdout, + top_global, + n_flexible_constraints(constr), + inputrec->nstcalcenergy, + DOMAINDECOMP(cr)); + + if (shellfc) + { + make_local_shells(cr, mdatoms, shellfc); + } + std::vector<size_t> atom_index = get_atom_index(top_global); + snew(fneg, atom_index.size()); + snew(dfdx, atom_index.size()); + +#if !GMX_DOUBLE + if (bIsMaster) + { + fprintf(stderr, + "NOTE: This version of GROMACS has been compiled in single precision,\n" + " which MIGHT not be accurate enough for normal mode analysis.\n" + " GROMACS now uses sparse matrix storage, so the memory requirements\n" + " are fairly modest even if you recompile in double precision.\n\n"); + } +#endif + + /* Check if we can/should use sparse storage format. + * + * Sparse format is only useful when the Hessian itself is sparse, which it + * will be when we use a cutoff. + * For small systems (n<1000) it is easier to always use full matrix format, though. + */ + if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0) + { + md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n"); + bSparse = FALSE; + } + else if (atom_index.size() < 1000) + { + md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", atom_index.size()); + bSparse = FALSE; + } + else + { + md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n"); + bSparse = TRUE; + } + + /* Number of dimensions, based on real atoms, that is not vsites or shell */ + sz = DIM*atom_index.size(); + + fprintf(stderr, "Allocating Hessian memory...\n\n"); + + if (bSparse) + { + sparse_matrix = gmx_sparsematrix_init(sz); + sparse_matrix->compressed_symmetric = TRUE; + } + else + { + snew(full_matrix, sz*sz); + } + + init_nrnb(nrnb); + + where(); + + /* Write start time and temperature */ + print_em_start(fplog, cr, walltime_accounting, wcycle, NM); + + /* fudge nr of steps to nr of atoms */ + inputrec->nsteps = atom_index.size()*2; + + if (bIsMaster) + { + fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n", + *(top_global->name), (int)inputrec->nsteps); + } + + nnodes = cr->nnodes; + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + evaluate_energy(fplog, cr, + top_global, state_work, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, -1, TRUE); + cr->nnodes = nnodes; + + /* if forces are not small, warn user */ + get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work); + + md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax); + if (state_work->fmax > 1.0e-3) + { + md_print_info(cr, fplog, + "The force is probably not small enough to " + "ensure that you are at a minimum.\n" + "Be aware that negative eigenvalues may occur\n" + "when the resulting matrix is diagonalized.\n\n"); + } + + /*********************************************************** + * + * Loop over all pairs in matrix + * + * do_force called twice. Once with positive and + * once with negative displacement + * + ************************************************************/ + + /* Steps are divided one by one over the nodes */ + bool bNS = true; + for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes) + { + size_t atom = atom_index[aid]; + for (size_t d = 0; d < DIM; d++) + { + gmx_bool bBornRadii = FALSE; + gmx_int64_t step = 0; + int force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES; + double t = 0; + + x_min = state_work->s.x[atom][d]; + + for (unsigned int dx = 0; (dx < 2); dx++) + { + if (dx == 0) + { + state_work->s.x[atom][d] = x_min - der_range; + } + else + { + state_work->s.x[atom][d] = x_min + der_range; + } + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + if (shellfc) + { + /* Now is the time to relax the shells */ + (void) relax_shell_flexcon(fplog, cr, bVerbose, step, + inputrec, bNS, force_flags, + top, + constr, enerd, fcd, + &state_work->s, state_work->f, vir, mdatoms, + nrnb, wcycle, graph, &top_global->groups, + shellfc, fr, bBornRadii, t, mu_tot, + vsite, NULL); + bNS = false; + step++; + } + else + { + evaluate_energy(fplog, cr, + top_global, state_work, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, mdatoms, fr, + mu_tot, enerd, vir, pres, atom*2+dx, FALSE); + } + + cr->nnodes = nnodes; + + if (dx == 0) + { + for (size_t i = 0; i < atom_index.size(); i++) + { + copy_rvec(state_work->f[atom_index[i]], fneg[i]); + } + } + } + + /* x is restored to original */ + state_work->s.x[atom][d] = x_min; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; (k < DIM); k++) + { + dfdx[j][k] = + -(state_work->f[atom_index[j]][k] - fneg[j][k])/(2*der_range); + } + } + + if (!bIsMaster) + { +#if GMX_MPI +#define mpi_type GMX_MPI_REAL + MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr), + cr->nodeid, cr->mpi_comm_mygroup); +#endif + } + else + { + for (node = 0; (node < nnodes && atom+node < atom_index.size()); node++) + { + if (node > 0) + { +#if GMX_MPI + MPI_Status stat; + MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node, + cr->mpi_comm_mygroup, &stat); +#undef mpi_type +#endif + } + + row = (atom + node)*DIM + d; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; k < DIM; k++) + { + col = j*DIM + k; + + if (bSparse) + { + if (col >= row && dfdx[j][k] != 0.0) + { + gmx_sparsematrix_increment_value(sparse_matrix, + row, col, dfdx[j][k]); + } + } + else + { + full_matrix[row*sz+col] = dfdx[j][k]; + } + } + } + } + } + + if (bVerbose && fplog) + { + fflush(fplog); + } + } + /* write progress */ + if (bIsMaster && bVerbose) + { + fprintf(stderr, "\rFinished step %d out of %d", + static_cast<int>(std::min(atom+nnodes, atom_index.size())), + static_cast<int>(atom_index.size())); + fflush(stderr); + } + } + + if (bIsMaster) + { + fprintf(stderr, "\n\nWriting Hessian...\n"); + gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2); + + return 0; +} + +} // namespace gmx diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/md.cpp b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/md.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2bd872565f10b34eaa07f80101ae3691a30be7ca --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/md.cpp @@ -0,0 +1,1940 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#include "gmxpre.h" + +#include "md.h" + +#include "config.h" + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include <algorithm> + +#include "thread_mpi/threads.h" + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_network.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme-load-balancing.h" +#include "gromacs/fileio/trxio.h" +#include "gromacs/gmxlib/md_logging.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/imd/imd.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/units.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vectypes.h" +#include "gromacs/mdlib/compute_io.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/ebin.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdoutf.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/mdrun_signalling.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vcm.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/df_history.h" +#include "gromacs/mdtypes/energyhistory.h" +#include "gromacs/mdtypes/fcdata.h" +#include "gromacs/mdtypes/forcerec.h" +#include "gromacs/mdtypes/group.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/interaction_const.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/swap/swapcoords.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/atoms.h" +#include "gromacs/topology/idef.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/topology/topology.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/real.h" +#include "gromacs/utility/smalloc.h" + +#include "deform.h" +#include "membed.h" +#include "repl_ex.h" + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +/* END PLUMED */ + +#ifdef GMX_FAHCORE +#include "corewrap.h" +#endif + +/*! \brief Check whether bonded interactions are missing, if appropriate + * + * \param[in] fplog Log file pointer + * \param[in] cr Communication object + * \param[in] totalNumberOfBondedInteractions Result of the global reduction over the number of bonds treated in each domain + * \param[in] top_global Global topology for the error message + * \param[in] top_local Local topology for the error message + * \param[in] state Global state for the error message + * \param[inout] shouldCheckNumberOfBondedInteractions Whether we should do the check. + * + * \return Nothing, except that shouldCheckNumberOfBondedInteractions + * is always set to false after exit. + */ +static void checkNumberOfBondedInteractions(FILE *fplog, t_commrec *cr, int totalNumberOfBondedInteractions, + gmx_mtop_t *top_global, gmx_localtop_t *top_local, t_state *state, + bool *shouldCheckNumberOfBondedInteractions) +{ + if (*shouldCheckNumberOfBondedInteractions) + { + if (totalNumberOfBondedInteractions != cr->dd->nbonded_global) + { + dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return + } + *shouldCheckNumberOfBondedInteractions = false; + } +} + +static void reset_all_counters(FILE *fplog, t_commrec *cr, + gmx_int64_t step, + gmx_int64_t *step_rel, t_inputrec *ir, + gmx_wallcycle_t wcycle, t_nrnb *nrnb, + gmx_walltime_accounting_t walltime_accounting, + struct nonbonded_verlet_t *nbv) +{ + char sbuf[STEPSTRSIZE]; + + /* Reset all the counters related to performance over the run */ + md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", + gmx_step_str(step, sbuf)); + + if (use_GPU(nbv)) + { + nbnxn_gpu_reset_timings(nbv); + resetGpuProfiler(); + } + + wallcycle_stop(wcycle, ewcRUN); + wallcycle_reset_all(wcycle); + if (DOMAINDECOMP(cr)) + { + reset_dd_statistics_counters(cr->dd); + } + init_nrnb(nrnb); + ir->init_step += *step_rel; + ir->nsteps -= *step_rel; + *step_rel = 0; + wallcycle_start(wcycle, ewcRUN); + walltime_accounting_start(walltime_accounting); + print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime()); +} + +/*! \libinternal + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double gmx::do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, t_inputrec *ir, + gmx_mtop_t *top_global, + t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + gmx_mdoutf_t outf = NULL; + gmx_int64_t step, step_rel; + double elapsed_time; + double t, t0, lam0[efptNR]; + gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner; + gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, + bFirstStep, startingFromCheckpoint, bInitStep, bLastStep = FALSE, + bBornRadii; + gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; + gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, + bForceUpdate = FALSE, bCPT; + gmx_bool bMasterState; + int force_flags, cglo_flags; + tensor force_vir, shake_vir, total_vir, tmp_vir, pres; + int i, m; + t_trxstatus *status; + rvec mu_tot; + t_vcm *vcm; + matrix pcoupl_mu, M; + t_trxframe rerun_fr; + gmx_repl_ex_t repl_ex = NULL; + int nchkpt = 1; + gmx_localtop_t *top; + t_mdebin *mdebin = NULL; + t_state *state = NULL; + gmx_enerdata_t *enerd; + rvec *f = NULL; + gmx_global_stat_t gstat; + gmx_update_t *upd = NULL; + t_graph *graph = NULL; + gmx_signalling_t gs; + gmx_groups_t *groups; + gmx_ekindata_t *ekind; + gmx_shellfc_t *shellfc; + gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; + gmx_bool bResetCountersHalfMaxH = FALSE; + gmx_bool bTemp, bPres, bTrotter; + real dvdl_constr; + rvec *cbuf = NULL; + int cbuf_nalloc = 0; + matrix lastbox; + int lamnew = 0; + /* for FEP */ + int nstfep = 0; + double cycles; + real saved_conserved_quantity = 0; + real last_ekin = 0; + t_extmass MassQ; + int **trotter_seq; + char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; + int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ + gmx_int64_t multisim_nsteps = -1; /* number of steps to do before first multisim + simulation stops. If equal to zero, don't + communicate any more between multisims.*/ + /* PME load balancing data for GPU kernels */ + pme_load_balancing_t *pme_loadbal = NULL; + gmx_bool bPMETune = FALSE; + gmx_bool bPMETunePrinting = FALSE; + + /* Interactive MD */ + gmx_bool bIMDstep = FALSE; + gmx_membed_t *membed = NULL; + + /* PLUMED */ + int plumedNeedsEnergy=0; + int plumedWantsToStop=0; + matrix plumed_vir; + /* END PLUMED */ + +#ifdef GMX_FAHCORE + /* Temporary addition for FAHCORE checkpointing */ + int chkpt_ret; +#endif + /* Domain decomposition could incorrectly miss a bonded + interaction, but checking for that requires a global + communication stage, which does not otherwise happen in DD + code. So we do that alongside the first global energy reduction + after a new DD is made. These variables handle whether the + check happens, and the result it returns. */ + bool shouldCheckNumberOfBondedInteractions = false; + int totalNumberOfBondedInteractions = -1; + + /* Check for special mdrun options */ + bRerunMD = (Flags & MD_RERUN); + if (Flags & MD_RESETCOUNTERSHALFWAY) + { + if (ir->nsteps > 0) + { + /* Signal to reset the counters half the simulation steps. */ + wcycle_set_reset_counters(wcycle, ir->nsteps/2); + } + /* Signal to reset the counters halfway the simulation time. */ + bResetCountersHalfMaxH = (max_hours > 0); + } + + /* md-vv uses averaged full step velocities for T-control + md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) + md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ + bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))); + + if (bRerunMD) + { + /* Since we don't know if the frames read are related in any way, + * rebuild the neighborlist at every step. + */ + ir->nstlist = 1; + ir->nstcalcenergy = 1; + nstglobalcomm = 1; + } + + nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); + bGStatEveryStep = (nstglobalcomm == 1); + + if (bRerunMD) + { + ir->nstxout_compressed = 0; + } + groups = &top_global->groups; + + if (opt2bSet("-membed", nfile, fnm)) + { + if (MASTER(cr)) + { + fprintf(stderr, "Initializing membed"); + } + /* Note that membed cannot work in parallel because mtop is + * changed here. Fix this if we ever want to make it run with + * multiple ranks. */ + membed = init_membed(fplog, nfile, fnm, top_global, ir, state_global, cr, &cpt_period); + } + + if (ir->eSwapCoords != eswapNO) + { + /* Initialize ion swapping code */ + init_swapcoords(fplog, bVerbose, ir, opt2fn_master("-swap", nfile, fnm, cr), + top_global, state_global->x, state_global->box, &state_global->swapstate, cr, oenv, Flags); + } + + /* Initial values */ + init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, + &(state_global->fep_state), lam0, + nrnb, top_global, &upd, + nfile, fnm, &outf, &mdebin, + force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle); + + clear_mat(total_vir); + clear_mat(pres); + /* Energy terms and groups */ + snew(enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + enerd); + if (DOMAINDECOMP(cr)) + { + f = NULL; + } + else + { + snew(f, top_global->natoms); + } + + /* Kinetic energy data */ + snew(ekind, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind); + /* Copy the cos acceleration to the groups struct */ + ekind->cosacc.cos_accel = ir->cos_accel; + + gstat = global_stat_init(ir); + + /* Check for polarizable models and flexible constraints */ + shellfc = init_shell_flexcon(fplog, + top_global, n_flexible_constraints(constr), + ir->nstcalcenergy, DOMAINDECOMP(cr)); + + if (shellfc && ir->nstcalcenergy != 1) + { + gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy); + } + if (shellfc && DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank"); + } + + if (inputrecDeform(ir)) + { + tMPI_Thread_mutex_lock(&deform_init_box_mutex); + set_deform_reference_box(upd, + deform_init_init_step_tpx, + deform_init_box_tpx); + tMPI_Thread_mutex_unlock(&deform_init_box_mutex); + } + + { + double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); + if ((io > 2000) && MASTER(cr)) + { + fprintf(stderr, + "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", + io); + } + } + + if (DOMAINDECOMP(cr)) + { + top = dd_init_local_top(top_global); + + snew(state, 1); + dd_init_local_state(cr->dd, state_global, state); + } + else + { + top = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO); + + forcerec_set_excl_load(fr, top); + + state = serial_init_local_state(state_global); + + atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); + + if (vsite) + { + set_vsite_top(vsite, top, mdatoms, cr); + } + + if (ir->ePBC != epbcNONE && !fr->bMolPBC) + { + graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); + } + + if (shellfc) + { + make_local_shells(cr, mdatoms, shellfc); + } + + setup_bonded_threading(fr, &top->idef); + + update_realloc(upd, state->nalloc); + } + + /* Set up interactive MD (IMD) */ + init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x, + nfile, fnm, oenv, imdport, Flags); + + if (DOMAINDECOMP(cr)) + { + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, constr, + nrnb, NULL, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->nalloc); + } + + update_mdatoms(mdatoms, state->lambda[efptMASS]); + + startingFromCheckpoint = Flags & MD_STARTFROMCPT; + + if (ir->bExpanded) + { + init_expanded_ensemble(startingFromCheckpoint, ir, &state->dfhist); + } + + if (MASTER(cr)) + { + if (startingFromCheckpoint) + { + /* Update mdebin with energy history if appending to output files */ + if (Flags & MD_APPENDFILES) + { + restore_energyhistory_from_state(mdebin, state_global->enerhist); + } + else + { + /* We might have read an energy history from checkpoint, + * free the allocated memory and reset the counts. + */ + done_energyhistory(state_global->enerhist); + init_energyhistory(state_global->enerhist); + } + } + /* Set the initial energy history in state by updating once */ + update_energyhistory(state_global->enerhist, mdebin); + } + + /* Initialize constraints */ + if (constr && !DOMAINDECOMP(cr)) + { + set_constraints(constr, top, ir, mdatoms, cr); + } + + if (repl_ex_nst > 0 && MASTER(cr)) + { + repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, + repl_ex_nst, repl_ex_nex, repl_ex_seed); + } + + /* PME tuning is only supported with PME for Coulomb. Is is not supported + * with only LJ PME, or for reruns. + */ + bPMETune = ((Flags & MD_TUNEPME) && EEL_PME(fr->eeltype) && !bRerunMD && + !(Flags & MD_REPRODUCIBLE)); + if (bPMETune) + { + pme_loadbal_init(&pme_loadbal, cr, fplog, ir, state->box, + fr->ic, fr->pmedata, use_GPU(fr->nbv), + &bPMETunePrinting); + } + + if (!ir->bContinuation && !bRerunMD) + { + if (mdatoms->cFREEZE && (state->flags & (1<<estV))) + { + /* Set the velocities of frozen particles to zero */ + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) + { + state->v[i][m] = 0; + } + } + } + } + + if (constr) + { + /* Constrain the initial coordinates and velocities */ + do_constrain_first(fplog, constr, ir, mdatoms, state, + cr, nrnb, fr, top); + } + if (vsite) + { + /* Construct the virtual sites for the initial configuration */ + construct_vsites(vsite, state->x, ir->delta_t, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + } + } + + if (ir->efep != efepNO) + { + /* Set free energy calculation frequency as the greatest common + * denominator of nstdhdl and repl_ex_nst. */ + nstfep = ir->fepvals->nstdhdl; + if (ir->bExpanded) + { + nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep); + } + if (repl_ex_nst > 0) + { + nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep); + } + } + + /* Be REALLY careful about what flags you set here. You CANNOT assume + * this is the first step, since we might be restarting from a checkpoint, + * and in that case we should not do any modifications to the state. + */ + bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation); + + if (Flags & MD_READ_EKIN) + { + restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); + } + + cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT + | (bStopCM ? CGLO_STOPCM : 0) + | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0) + | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) + | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); + + bSumEkinhOld = FALSE; + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)); + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + if (ir->eI == eiVVAK) + { + /* a second call to get the half step temperature initialized as well */ + /* we do the same call as above, but turn the pressure off -- internally to + compute_globals, this is recognized as a velocity verlet half-step + kinetic energy calculation. This minimized excess variables, but + perhaps loses some logic?*/ + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + NULL, &bSumEkinhOld, + cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); + } + + /* Calculate the initial half step temperature, and save the ekinh_old */ + if (!(Flags & MD_STARTFROMCPT)) + { + for (i = 0; (i < ir->opts.ngtc); i++) + { + copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); + } + } + if (ir->eI != eiVV) + { + enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, + and there is no previous step */ + } + + /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter + temperature control */ + trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); + + if (MASTER(cr)) + { + if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) + { + fprintf(fplog, + "RMS relative constraint deviation after constraining: %.2e\n", + constr_rmsd(constr)); + } + if (EI_STATE_VELOCITY(ir->eI)) + { + fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); + } + if (bRerunMD) + { + fprintf(stderr, "starting md rerun '%s', reading coordinates from" + " input trajectory '%s'\n\n", + *(top_global->name), opt2fn("-rerun", nfile, fnm)); + if (bVerbose) + { + fprintf(stderr, "Calculated time to finish depends on nsteps from " + "run input file,\nwhich may not correspond to the time " + "needed to process input trajectory.\n\n"); + } + } + else + { + char tbuf[20]; + fprintf(stderr, "starting mdrun '%s'\n", + *(top_global->name)); + if (ir->nsteps >= 0) + { + sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); + } + else + { + sprintf(tbuf, "%s", "infinite"); + } + if (ir->init_step > 0) + { + fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", + gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, + gmx_step_str(ir->init_step, sbuf2), + ir->init_step*ir->delta_t); + } + else + { + fprintf(stderr, "%s steps, %s ps.\n", + gmx_step_str(ir->nsteps, sbuf), tbuf); + } + } + fprintf(fplog, "\n"); + } + + /* PLUMED */ + if(plumedswitch){ + /* detect plumed API version */ + int pversion=0; + plumed_cmd(plumedmain,"getApiVersion",&pversion); + /* setting kbT is only implemented with api>1) */ + real kbT=ir->opts.ref_t[0]*BOLTZ; + if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); + if(pversion>2){ + int res=1; + if( (Flags & MD_STARTFROMCPT) ) plumed_cmd(plumedmain,"setRestart",&res); + } + + if(cr->ms && cr->ms->nsim>1) { + if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters); + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); + }else{ + plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); + } + } + plumed_cmd(plumedmain,"GREX init",NULL); + } + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); + } + } + plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); + plumed_cmd(plumedmain,"setMDEngine","gromacs"); + plumed_cmd(plumedmain,"setLog",fplog); + real real_delta_t; + real_delta_t=ir->delta_t; + plumed_cmd(plumedmain,"setTimestep",&real_delta_t); + plumed_cmd(plumedmain,"init",NULL); + + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); + plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); + } + } + } + /* END PLUMED */ + + walltime_accounting_start(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, "mdrun"); + + /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ +#ifdef GMX_FAHCORE + chkpt_ret = fcCheckPointParallel( cr->nodeid, + NULL, 0); + if (chkpt_ret == 0) + { + gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); + } +#endif + + /*********************************************************** + * + * Loop over MD steps + * + ************************************************************/ + + /* if rerunMD then read coordinates and velocities from input trajectory */ + if (bRerunMD) + { + if (getenv("GMX_FORCE_UPDATE")) + { + bForceUpdate = TRUE; + } + + rerun_fr.natoms = 0; + if (MASTER(cr)) + { + bLastStep = !read_first_frame(oenv, &status, + opt2fn("-rerun", nfile, fnm), + &rerun_fr, TRX_NEED_X | TRX_READ_V); + if (rerun_fr.natoms != top_global->natoms) + { + gmx_fatal(FARGS, + "Number of atoms in trajectory (%d) does not match the " + "run input file (%d)\n", + rerun_fr.natoms, top_global->natoms); + } + if (ir->ePBC != epbcNONE) + { + if (!rerun_fr.bBox) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); + } + if (max_cutoff2(ir->ePBC, rerun_fr.box) < gmx::square(fr->rlist)) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); + } + } + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bLastStep); + } + + if (ir->ePBC != epbcNONE) + { + /* Set the shift vectors. + * Necessary here when have a static box different from the tpr box. + */ + calc_shifts(rerun_fr.box, fr->shift_vec); + } + } + + /* loop over MD steps or if rerunMD to end of input trajectory */ + bFirstStep = TRUE; + /* Skip the first Nose-Hoover integration when we get the state from tpx */ + bInitStep = !startingFromCheckpoint || EI_VV(ir->eI); + bSumEkinhOld = FALSE; + bExchanged = FALSE; + bNeedRepartition = FALSE; + + init_global_signals(&gs, cr, ir, repl_ex_nst); + + step = ir->init_step; + step_rel = 0; + + if (MULTISIM(cr) && (repl_ex_nst <= 0 )) + { + /* check how many steps are left in other sims */ + multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); + } + + + /* and stop now if we should */ + bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps) || + ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); + while (!bLastStep) + { + + /* Determine if this is a neighbor search step */ + bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); + + if (bPMETune && bNStList) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + pme_loadbal_do(pme_loadbal, cr, + (bVerbose && MASTER(cr)) ? stderr : NULL, + fplog, + ir, fr, state, + wcycle, + step, step_rel, + &bPMETunePrinting); + } + + wallcycle_start(wcycle, ewcSTEP); + + if (bRerunMD) + { + if (rerun_fr.bStep) + { + step = rerun_fr.step; + step_rel = step - ir->init_step; + } + if (rerun_fr.bTime) + { + t = rerun_fr.time; + } + else + { + t = step; + } + } + else + { + bLastStep = (step_rel == ir->nsteps); + t = t0 + step*ir->delta_t; + } + + if (ir->efep != efepNO || ir->bSimTemp) + { + /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, + requiring different logic. */ + + set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); + bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); + bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep)); + bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) + && (ir->bExpanded) && (step > 0) && (!startingFromCheckpoint)); + } + + bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep && + do_per_step(step, repl_ex_nst)); + + if (bSimAnn) + { + update_annealing_target_temp(ir, t, upd); + } + + if (bRerunMD) + { + if (!DOMAINDECOMP(cr) || MASTER(cr)) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.x[i], state_global->x[i]); + } + if (rerun_fr.bV) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.v[i], state_global->v[i]); + } + } + else + { + for (i = 0; i < state_global->natoms; i++) + { + clear_rvec(state_global->v[i]); + } + if (bRerunWarnNoV) + { + fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" + " Ekin, temperature and pressure are incorrect,\n" + " the virial will be incorrect when constraints are present.\n" + "\n"); + bRerunWarnNoV = FALSE; + } + } + } + copy_mat(rerun_fr.box, state_global->box); + copy_mat(state_global->box, state->box); + + if (vsite && (Flags & MD_RERUN_VSITE)) + { + if (DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank"); + } + if (graph) + { + /* Following is necessary because the graph may get out of sync + * with the coordinates if we only have every N'th coordinate set + */ + mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); + shift_self(graph, state->box, state->x); + } + construct_vsites(vsite, state->x, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + if (graph) + { + unshift_self(graph, state->box, state->x); + } + } + } + + /* Stop Center of Mass motion */ + bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); + + if (bRerunMD) + { + /* for rerun MD always do Neighbour Searching */ + bNS = (bFirstStep || ir->nstlist != 0); + bNStList = bNS; + } + else + { + /* Determine whether or not to do Neighbour Searching */ + bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition); + } + + /* check whether we should stop because another simulation has + stopped. */ + if (MULTISIM(cr)) + { + if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && + (multisim_nsteps != ir->nsteps) ) + { + if (bNS) + { + if (MASTER(cr)) + { + fprintf(stderr, + "Stopping simulation %d because another one has finished\n", + cr->ms->sim); + } + bLastStep = TRUE; + gs.sig[eglsCHKPT] = 1; + } + } + } + + /* < 0 means stop after this step, > 0 means stop at next NS step */ + if ( (gs.set[eglsSTOPCOND] < 0) || + ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) ) + { + bLastStep = TRUE; + } + + /* Determine whether or not to update the Born radii if doing GB */ + bBornRadii = bFirstStep; + if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) + { + bBornRadii = TRUE; + } + + /* do_log triggers energy and virial calculation. Because this leads + * to different code paths, forces can be different. Thus for exact + * continuation we should avoid extra log output. + * Note that the || bLastStep can result in non-exact continuation + * beyond the last step. But we don't consider that to be an issue. + */ + do_log = do_per_step(step, ir->nstlog) || (bFirstStep && !startingFromCheckpoint) || bLastStep || bRerunMD; + do_verbose = bVerbose && + (step % stepout == 0 || bFirstStep || bLastStep || bRerunMD); + + if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) + { + if (bRerunMD) + { + bMasterState = TRUE; + } + else + { + bMasterState = FALSE; + /* Correct the new box if it is too skewed */ + if (inputrecDynamicBox(ir)) + { + if (correct_box(fplog, step, state->box, graph)) + { + bMasterState = TRUE; + } + } + if (DOMAINDECOMP(cr) && bMasterState) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + if (DOMAINDECOMP(cr)) + { + /* Repartition the domain decomposition */ + dd_partition_system(fplog, step, cr, + bMasterState, nstglobalcomm, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, constr, + nrnb, wcycle, + do_verbose && !bPMETunePrinting); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->nalloc); + + /* PLUMED */ + if(plumedswitch){ + plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home); + plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex); + } + /* END PLUMED */ + } + } + + if (MASTER(cr) && do_log) + { + print_ebin_header(fplog, step, t); /* can we improve the information printed here? */ + } + + if (ir->efep != efepNO) + { + update_mdatoms(mdatoms, state->lambda[efptMASS]); + } + + if ((bRerunMD && rerun_fr.bV) || bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS); + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + clear_mat(force_vir); + + /* We write a checkpoint at this MD step when: + * either at an NS step when we signalled through gs, + * or at the last step (but not when we do not want confout), + * but never at the first step or with rerun. + */ + bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || + (bLastStep && (Flags & MD_CONFOUT))) && + step > ir->init_step && !bRerunMD); + if (bCPT) + { + gs.set[eglsCHKPT] = 0; + } + + /* Determine the energy and pressure: + * at nstcalcenergy steps and at energy output steps (set below). + */ + if (EI_VV(ir->eI) && (!bInitStep)) + { + /* for vv, the first half of the integration actually corresponds + to the previous step. bCalcEner is only required to be evaluated on the 'next' step, + but the virial needs to be calculated on both the current step and the 'next' step. Future + reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ + + /* TODO: This is probably not what we want, we will write to energy file one step after nstcalcenergy steps. */ + bCalcEnerStep = do_per_step(step - 1, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); + } + else + { + bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); + } + bCalcEner = bCalcEnerStep; + + do_ene = (do_per_step(step, ir->nstenergy) || bLastStep || bRerunMD); + + if (do_ene || do_log || bDoReplEx) + { + bCalcVir = TRUE; + bCalcEner = TRUE; + } + + /* Do we need global communication ? */ + bGStat = (bCalcVir || bCalcEner || bStopCM || + do_per_step(step, nstglobalcomm) || + (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm))); + + force_flags = (GMX_FORCE_STATECHANGED | + ((inputrecDynamicBox(ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | + GMX_FORCE_ALLFORCES | + (bCalcVir ? GMX_FORCE_VIRIAL : 0) | + (bCalcEner ? GMX_FORCE_ENERGY : 0) | + (bDoFEP ? GMX_FORCE_DHDL : 0) + ); + + if (shellfc) + { + /* Now is the time to relax the shells */ + relax_shell_flexcon(fplog, cr, bVerbose, step, + ir, bNS, force_flags, top, + constr, enerd, fcd, + state, f, force_vir, mdatoms, + nrnb, wcycle, graph, groups, + shellfc, fr, bBornRadii, t, mu_tot, + vsite, mdoutf_get_fp_field(outf)); + } + else + { + /* The coordinates (x) are shifted (to get whole molecules) + * in do_force. + * This is parallellized as well, and does communication too. + * Check comments in sim_util.c + */ + + /* PLUMED */ + plumedNeedsEnergy=0; + if(plumedswitch){ + long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); + plumed_cmd(plumedmain,"setPositions",&state->x[0][0]); + plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]); + plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]); + plumed_cmd(plumedmain,"setBox",&state->box[0][0]); + plumed_cmd(plumedmain,"prepareCalc",NULL); + plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); + plumed_cmd(plumedmain,"setForces",&f[0][0]); + plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); + clear_mat(plumed_vir); + plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); + } + /* END PLUMED */ + do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, + state->box, state->x, &state->hist, + f, force_vir, mdatoms, enerd, fcd, + state->lambda, graph, + fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, + (bNS ? GMX_FORCE_NS : 0) | force_flags); + /* PLUMED */ + if(plumedswitch){ + if(plumedNeedsEnergy){ + msmul(force_vir,2.0,plumed_vir); + plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); + plumed_cmd(plumedmain,"performCalc",NULL); + msmul(plumed_vir,0.5,force_vir); + } else { + msmul(plumed_vir,0.5,plumed_vir); + m_add(force_vir,plumed_vir,force_vir); + } + if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && + do_per_step(step,repl_ex_nst)) plumed_cmd(plumedmain,"GREX savePositions",NULL); + if(plumedWantsToStop) ir->nsteps=step_rel+1; + } + /* END PLUMED */ + } + + if (EI_VV(ir->eI) && !startingFromCheckpoint && !bRerunMD) + /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ + { + rvec *vbuf = NULL; + + wallcycle_start(wcycle, ewcUPDATE); + if (ir->eI == eiVV && bInitStep) + { + /* if using velocity verlet with full time step Ekin, + * take the first half step only to compute the + * virial for the first step. From there, + * revert back to the initial coordinates + * so that the input is actually the initial step. + */ + snew(vbuf, state->natoms); + copy_rvecn(state->v, vbuf, 0, state->natoms); /* should make this better for parallelizing? */ + } + else + { + /* this is for NHC in the Ekin(t+dt/2) version of vv */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); + } + + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtVELOCITY1, + cr, constr); + + if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ + { + wallcycle_stop(wcycle, ewcUPDATE); + update_constraints(fplog, step, NULL, ir, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, shake_vir, + cr, nrnb, wcycle, upd, constr, + TRUE, bCalcVir); + wallcycle_start(wcycle, ewcUPDATE); + } + else if (graph) + { + /* Need to unshift here if a do_force has been + called in the previous step */ + unshift_self(graph, state->box, state->x); + } + /* if VV, compute the pressure and constraints */ + /* For VV2, we strictly only need this if using pressure + * control, but we really would like to have accurate pressures + * printed out. + * Think about ways around this in the future? + * For now, keep this choice in comments. + */ + /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */ + /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/ + bPres = TRUE; + bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); + if (bCalcEner && ir->eI == eiVVAK) + { + bSumEkinhOld = TRUE; + } + /* for vv, the first half of the integration actually corresponds to the previous step. + So we need information from the last step in the first half of the integration */ + if (bGStat || do_per_step(step-1, nstglobalcomm)) + { + wallcycle_stop(wcycle, ewcUPDATE); + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | CGLO_ENERGY + | (bTemp ? CGLO_TEMPERATURE : 0) + | (bPres ? CGLO_PRESSURE : 0) + | (bPres ? CGLO_CONSTRAINT : 0) + | (bStopCM ? CGLO_STOPCM : 0) + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + | CGLO_SCALEEKIN + ); + /* explanation of above: + a) We compute Ekin at the full time step + if 1) we are using the AveVel Ekin, and it's not the + initial step, or 2) if we are using AveEkin, but need the full + time step kinetic energy for the pressure (always true now, since we want accurate statistics). + b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in + EkinAveVel because it's needed for the pressure */ + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + wallcycle_start(wcycle, ewcUPDATE); + } + /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ + if (!bInitStep) + { + if (bTrotter) + { + m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); + + copy_mat(shake_vir, state->svir_prev); + copy_mat(force_vir, state->fvir_prev); + if (inputrecNvtTrotter(ir) && ir->eI == eiVV) + { + /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ + enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE); + enerd->term[F_EKIN] = trace(ekind->ekin); + } + } + else if (bExchanged) + { + wallcycle_stop(wcycle, ewcUPDATE); + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + NULL, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE); + wallcycle_start(wcycle, ewcUPDATE); + } + } + /* if it's the initial step, we performed this first step just to get the constraint virial */ + if (ir->eI == eiVV && bInitStep) + { + copy_rvecn(vbuf, state->v, 0, state->natoms); + sfree(vbuf); + } + wallcycle_stop(wcycle, ewcUPDATE); + } + + /* compute the conserved quantity */ + if (EI_VV(ir->eI)) + { + saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); + if (ir->eI == eiVV) + { + last_ekin = enerd->term[F_EKIN]; + } + if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) + { + saved_conserved_quantity -= enerd->term[F_DISPCORR]; + } + /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ + if (ir->efep != efepNO && !bRerunMD) + { + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + } + + /* ######## END FIRST UPDATE STEP ############## */ + /* ######## If doing VV, we now have v(dt) ###### */ + if (bDoExpanded) + { + /* perform extended ensemble sampling in lambda - we don't + actually move to the new state before outputting + statistics, but if performing simulated tempering, we + do update the velocities and the tau_t. */ + + lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms); + /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ + copy_df_history(&state_global->dfhist, &state->dfhist); + } + + /* Now we have the energies and forces corresponding to the + * coordinates at time t. We must output all of this before + * the update. + */ + do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, + ir, state, state_global, top_global, fr, + outf, mdebin, ekind, f, + &nchkpt, + bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), + bSumEkinhOld); + /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ + bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle); + + /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */ + if (startingFromCheckpoint && bTrotter) + { + copy_mat(state->svir_prev, shake_vir); + copy_mat(state->fvir_prev, force_vir); + } + + elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting); + + /* Check whether everything is still allright */ + if (((int)gmx_get_stop_condition() > handled_stop_condition) +#if GMX_THREAD_MPI + && MASTER(cr) +#endif + ) + { + int nsteps_stop = -1; + + /* this is just make gs.sig compatible with the hack + of sending signals around by MPI_Reduce with together with + other floats */ + if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) + { + gs.sig[eglsSTOPCOND] = 1; + nsteps_stop = std::max(ir->nstlist, 2*nstglobalcomm); + } + if (gmx_get_stop_condition() == gmx_stop_cond_next) + { + gs.sig[eglsSTOPCOND] = -1; + nsteps_stop = nstglobalcomm + 1; + } + if (fplog) + { + fprintf(fplog, + "\n\nReceived the %s signal, stopping within %d steps\n\n", + gmx_get_signal_name(), nsteps_stop); + fflush(fplog); + } + fprintf(stderr, + "\n\nReceived the %s signal, stopping within %d steps\n\n", + gmx_get_signal_name(), nsteps_stop); + fflush(stderr); + handled_stop_condition = (int)gmx_get_stop_condition(); + } + else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && + (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) && + gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) + { + /* Signal to terminate the run */ + gs.sig[eglsSTOPCOND] = 1; + if (fplog) + { + fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + + if (bResetCountersHalfMaxH && MASTER(cr) && + elapsed_time > max_hours*60.0*60.0*0.495) + { + /* Set flag that will communicate the signal to all ranks in the simulation */ + gs.sig[eglsRESETCOUNTERS] = 1; + } + + /* In parallel we only have to check for checkpointing in steps + * where we do global communication, + * otherwise the other nodes don't know. + */ + if (MASTER(cr) && ((bGStat || !PAR(cr)) && + cpt_period >= 0 && + (cpt_period == 0 || + elapsed_time >= nchkpt*cpt_period*60.0)) && + gs.set[eglsCHKPT] == 0) + { + gs.sig[eglsCHKPT] = 1; + } + + /* ######### START SECOND UPDATE STEP ################# */ + + /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled + in preprocessing */ + + if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ + { + gmx_bool bIfRandomize; + bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr); + /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ + if (constr && bIfRandomize) + { + update_constraints(fplog, step, NULL, ir, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, + cr, nrnb, wcycle, upd, constr, + TRUE, bCalcVir); + } + } + /* Box is changed in update() when we do pressure coupling, + * but we should still use the old box for energy corrections and when + * writing it to the energy file, so it matches the trajectory files for + * the same timestep above. Make a copy in a separate array. + */ + copy_mat(state->box, lastbox); + + dvdl_constr = 0; + + if (!bRerunMD || rerun_fr.bV || bForceUpdate) + { + wallcycle_start(wcycle, ewcUPDATE); + /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ + if (bTrotter) + { + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); + /* We can only do Berendsen coupling after we have summed + * the kinetic energy or virial. Since the happens + * in global_state after update, we should only do it at + * step % nstlist = 1 with bGStatEveryStep=FALSE. + */ + } + else + { + update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); + update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep); + } + + if (EI_VV(ir->eI)) + { + /* velocity half-step update */ + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtVELOCITY2, + cr, constr); + } + + /* Above, initialize just copies ekinh into ekin, + * it doesn't copy position (for VV), + * and entire integrator for MD. + */ + + if (ir->eI == eiVVAK) + { + /* We probably only need md->homenr, not state->natoms */ + if (state->natoms > cbuf_nalloc) + { + cbuf_nalloc = state->natoms; + srenew(cbuf, cbuf_nalloc); + } + copy_rvecn(state->x, cbuf, 0, state->natoms); + } + + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + update_constraints(fplog, step, &dvdl_constr, ir, mdatoms, state, + fr->bMolPBC, graph, f, + &top->idef, shake_vir, + cr, nrnb, wcycle, upd, constr, + FALSE, bCalcVir); + + if (ir->eI == eiVVAK) + { + /* erase F_EKIN and F_TEMP here? */ + /* just compute the kinetic energy at the half step to perform a trotter step */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, lastbox, + NULL, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE + ); + wallcycle_start(wcycle, ewcUPDATE); + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); + /* now we know the scaling, we can compute the positions again again */ + copy_rvecn(cbuf, state->x, 0, state->natoms); + + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ + /* are the small terms in the shake_vir here due + * to numerical errors, or are they important + * physically? I'm thinking they are just errors, but not completely sure. + * For now, will call without actually constraining, constr=NULL*/ + update_constraints(fplog, step, NULL, ir, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, + cr, nrnb, wcycle, upd, NULL, + FALSE, bCalcVir); + } + if (EI_VV(ir->eI)) + { + /* this factor or 2 correction is necessary + because half of the constraint force is removed + in the vv step, so we have to double it. See + the Redmine issue #1255. It is not yet clear + if the factor of 2 is exact, or just a very + good approximation, and this will be + investigated. The next step is to see if this + can be done adding a dhdl contribution from the + rattle step, but this is somewhat more + complicated with the current code. Will be + investigated, hopefully for 4.6.3. However, + this current solution is much better than + having it completely wrong. + */ + enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; + } + else + { + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + } + } + else if (graph) + { + /* Need to unshift here */ + unshift_self(graph, state->box, state->x); + } + + if (vsite != NULL) + { + wallcycle_start(wcycle, ewcVSITECONSTR); + if (graph != NULL) + { + shift_self(graph, state->box, state->x); + } + construct_vsites(vsite, state->x, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + + if (graph != NULL) + { + unshift_self(graph, state->box, state->x); + } + wallcycle_stop(wcycle, ewcVSITECONSTR); + } + + /* ############## IF NOT VV, Calculate globals HERE ############ */ + /* With Leap-Frog we can skip compute_globals at + * non-communication steps, but we need to calculate + * the kinetic energy one step before communication. + */ + if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) + { + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &gs, + (step_rel % gs.nstms == 0) && + (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), + lastbox, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) + | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) + | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) + | CGLO_CONSTRAINT + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + ); + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + + /* ############# END CALC EKIN AND PRESSURE ################# */ + + /* Note: this is OK, but there are some numerical precision issues with using the convergence of + the virial that should probably be addressed eventually. state->veta has better properies, + but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could + generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ + + if (ir->efep != efepNO && (!EI_VV(ir->eI) || bRerunMD)) + { + /* Sum up the foreign energy and dhdl terms for md and sd. + Currently done every step so that dhdl is correct in the .edr */ + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + update_box(fplog, step, ir, mdatoms, state, f, + pcoupl_mu, nrnb, upd); + + /* ################# END UPDATE STEP 2 ################# */ + /* #### We now have r(t+dt) and v(t+dt/2) ############# */ + + /* The coordinates (x) were unshifted in update */ + if (!bGStat) + { + /* We will not sum ekinh_old, + * so signal that we still have to do it. + */ + bSumEkinhOld = TRUE; + } + + /* ######### BEGIN PREPARING EDR OUTPUT ########### */ + + /* use the directly determined last velocity, not actually the averaged half steps */ + if (bTrotter && ir->eI == eiVV) + { + enerd->term[F_EKIN] = last_ekin; + } + enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; + + if (EI_VV(ir->eI)) + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; + } + else + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); + } + /* ######### END PREPARING EDR OUTPUT ########### */ + + /* Output stuff */ + if (MASTER(cr)) + { + if (fplog && do_log && bDoExpanded) + { + /* only needed if doing expanded ensemble */ + PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, + &state_global->dfhist, state->fep_state, ir->nstlog, step); + } + if (bCalcEner) + { + upd_mdebin(mdebin, bDoDHDL, bCalcEnerStep, + t, mdatoms->tmass, enerd, state, + ir->fepvals, ir->expandedvals, lastbox, + shake_vir, force_vir, total_vir, pres, + ekind, mu_tot, constr); + } + else + { + upd_mdebin_step(mdebin); + } + + gmx_bool do_dr = do_per_step(step, ir->nstdisreout); + gmx_bool do_or = do_per_step(step, ir->nstorireout); + + print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL, + step, t, + eprNORMAL, mdebin, fcd, groups, &(ir->opts)); + + if (ir->bPull) + { + pull_print_output(ir->pull_work, step, t); + } + + if (do_per_step(step, ir->nstlog)) + { + if (fflush(fplog) != 0) + { + gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); + } + } + } + if (bDoExpanded) + { + /* Have to do this part _after_ outputting the logfile and the edr file */ + /* Gets written into the state at the beginning of next loop*/ + state->fep_state = lamnew; + } + /* Print the remaining wall clock time for the run */ + if (MULTIMASTER(cr) && + (do_verbose || gmx_got_usr_signal()) && + !bPMETunePrinting) + { + if (shellfc) + { + fprintf(stderr, "\n"); + } + print_time(stderr, walltime_accounting, step, ir, cr); + } + + /* Ion/water position swapping. + * Not done in last step since trajectory writing happens before this call + * in the MD loop and exchanges would be lost anyway. */ + bNeedRepartition = FALSE; + if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && + do_per_step(step, ir->swap->nstswap)) + { + bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, + bRerunMD ? rerun_fr.x : state->x, + bRerunMD ? rerun_fr.box : state->box, + top_global, MASTER(cr) && bVerbose, bRerunMD); + + if (bNeedRepartition && DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + /* Replica exchange */ + bExchanged = FALSE; + if (bDoReplEx) + { + bExchanged = replica_exchange(fplog, cr, repl_ex, + state_global, enerd, + state, step, t); + } + + if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) + { + dd_partition_system(fplog, step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, constr, + nrnb, wcycle, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->nalloc); + } + + bFirstStep = FALSE; + bInitStep = FALSE; + startingFromCheckpoint = FALSE; + + /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ + /* With all integrators, except VV, we need to retain the pressure + * at the current step for coupling at the next step. + */ + if ((state->flags & (1<<estPRES_PREV)) && + (bGStatEveryStep || + (ir->nstpcouple > 0 && step % ir->nstpcouple == 0))) + { + /* Store the pressure in t_state for pressure coupling + * at the next MD step. + */ + copy_mat(pres, state->pres_prev); + } + + /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ + + if ( (membed != NULL) && (!bLastStep) ) + { + rescale_membed(step_rel, membed, state_global->x); + } + + if (bRerunMD) + { + if (MASTER(cr)) + { + /* read next frame from input trajectory */ + bLastStep = !read_next_frame(oenv, status, &rerun_fr); + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bLastStep); + } + } + + cycles = wallcycle_stop(wcycle, ewcSTEP); + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles, ddCyclStep); + } + + if (!bRerunMD || !rerun_fr.bStep) + { + /* increase the MD step number */ + step++; + step_rel++; + } + + /* TODO make a counter-reset module */ + /* If it is time to reset counters, set a flag that remains + true until counters actually get reset */ + if (step_rel == wcycle_get_reset_counters(wcycle) || + gs.set[eglsRESETCOUNTERS] != 0) + { + if (pme_loadbal_is_active(pme_loadbal)) + { + /* Do not permit counter reset while PME load + * balancing is active. The only purpose for resetting + * counters is to measure reliable performance data, + * and that can't be done before balancing + * completes. + * + * TODO consider fixing this by delaying the reset + * until after load balancing completes, + * e.g. https://gerrit.gromacs.org/#/c/4964/2 */ + gmx_fatal(FARGS, "PME tuning was still active when attempting to " + "reset mdrun counters at step %" GMX_PRId64 ". Try " + "resetting counters later in the run, e.g. with gmx " + "mdrun -resetstep.", step); + } + reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, + use_GPU(fr->nbv) ? fr->nbv : NULL); + wcycle_set_reset_counters(wcycle, -1); + if (!(cr->duty & DUTY_PME)) + { + /* Tell our PME node to reset its counters */ + gmx_pme_send_resetcounters(cr, step); + } + /* Correct max_hours for the elapsed time */ + max_hours -= elapsed_time/(60.0*60.0); + /* If mdrun -maxh -resethway was active, it can only trigger once */ + bResetCountersHalfMaxH = FALSE; /* TODO move this to where gs.sig[eglsRESETCOUNTERS] is set */ + /* Reset can only happen once, so clear the triggering flag. */ + gs.set[eglsRESETCOUNTERS] = 0; + } + + /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ + IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle); + + } + /* End of main MD loop */ + + /* Closing TNG files can include compressing data. Therefore it is good to do that + * before stopping the time measurements. */ + mdoutf_tng_close(outf); + + /* Stop measuring walltime */ + walltime_accounting_end(walltime_accounting); + + if (bRerunMD && MASTER(cr)) + { + close_trj(status); + } + + if (!(cr->duty & DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + if (MASTER(cr)) + { + if (ir->nstcalcenergy > 0 && !bRerunMD) + { + print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, + eprAVER, mdebin, fcd, groups, &(ir->opts)); + } + } + + done_mdoutf(outf); + + if (bPMETune) + { + pme_loadbal_done(pme_loadbal, cr, fplog, use_GPU(fr->nbv)); + } + + done_shellfc(fplog, shellfc, step_rel); + + if (repl_ex_nst > 0 && MASTER(cr)) + { + print_replica_exchange_statistics(fplog, repl_ex); + } + + // Clean up swapcoords + if (ir->eSwapCoords != eswapNO) + { + finish_swapcoords(ir->swap); + } + + if (membed != nullptr) + { + free_membed(membed); + } + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(ir->bIMD, ir->imd); + + walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); + + return 0; +} diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/md.cpp.preplumed b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/md.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..92fa911eb42e08119a3cb688cadea70fab76148f --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/md.cpp.preplumed @@ -0,0 +1,1841 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#include "gmxpre.h" + +#include "md.h" + +#include "config.h" + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include <algorithm> + +#include "thread_mpi/threads.h" + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_network.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme-load-balancing.h" +#include "gromacs/fileio/trxio.h" +#include "gromacs/gmxlib/md_logging.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/imd/imd.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vectypes.h" +#include "gromacs/mdlib/compute_io.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/ebin.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdoutf.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/mdrun_signalling.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vcm.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/df_history.h" +#include "gromacs/mdtypes/energyhistory.h" +#include "gromacs/mdtypes/fcdata.h" +#include "gromacs/mdtypes/forcerec.h" +#include "gromacs/mdtypes/group.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/interaction_const.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/swap/swapcoords.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/atoms.h" +#include "gromacs/topology/idef.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/topology/topology.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/real.h" +#include "gromacs/utility/smalloc.h" + +#include "deform.h" +#include "membed.h" +#include "repl_ex.h" + +#ifdef GMX_FAHCORE +#include "corewrap.h" +#endif + +/*! \brief Check whether bonded interactions are missing, if appropriate + * + * \param[in] fplog Log file pointer + * \param[in] cr Communication object + * \param[in] totalNumberOfBondedInteractions Result of the global reduction over the number of bonds treated in each domain + * \param[in] top_global Global topology for the error message + * \param[in] top_local Local topology for the error message + * \param[in] state Global state for the error message + * \param[inout] shouldCheckNumberOfBondedInteractions Whether we should do the check. + * + * \return Nothing, except that shouldCheckNumberOfBondedInteractions + * is always set to false after exit. + */ +static void checkNumberOfBondedInteractions(FILE *fplog, t_commrec *cr, int totalNumberOfBondedInteractions, + gmx_mtop_t *top_global, gmx_localtop_t *top_local, t_state *state, + bool *shouldCheckNumberOfBondedInteractions) +{ + if (*shouldCheckNumberOfBondedInteractions) + { + if (totalNumberOfBondedInteractions != cr->dd->nbonded_global) + { + dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return + } + *shouldCheckNumberOfBondedInteractions = false; + } +} + +static void reset_all_counters(FILE *fplog, t_commrec *cr, + gmx_int64_t step, + gmx_int64_t *step_rel, t_inputrec *ir, + gmx_wallcycle_t wcycle, t_nrnb *nrnb, + gmx_walltime_accounting_t walltime_accounting, + struct nonbonded_verlet_t *nbv) +{ + char sbuf[STEPSTRSIZE]; + + /* Reset all the counters related to performance over the run */ + md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", + gmx_step_str(step, sbuf)); + + if (use_GPU(nbv)) + { + nbnxn_gpu_reset_timings(nbv); + resetGpuProfiler(); + } + + wallcycle_stop(wcycle, ewcRUN); + wallcycle_reset_all(wcycle); + if (DOMAINDECOMP(cr)) + { + reset_dd_statistics_counters(cr->dd); + } + init_nrnb(nrnb); + ir->init_step += *step_rel; + ir->nsteps -= *step_rel; + *step_rel = 0; + wallcycle_start(wcycle, ewcRUN); + walltime_accounting_start(walltime_accounting); + print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime()); +} + +/*! \libinternal + \copydoc integrator_t (FILE *fplog, t_commrec *cr, + int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, + t_inputrec *inputrec, + gmx_mtop_t *top_global, t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, + t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) + */ +double gmx::do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], + const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, t_inputrec *ir, + gmx_mtop_t *top_global, + t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real cpt_period, real max_hours, + int imdport, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + gmx_mdoutf_t outf = NULL; + gmx_int64_t step, step_rel; + double elapsed_time; + double t, t0, lam0[efptNR]; + gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner; + gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, + bFirstStep, startingFromCheckpoint, bInitStep, bLastStep = FALSE, + bBornRadii; + gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; + gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, + bForceUpdate = FALSE, bCPT; + gmx_bool bMasterState; + int force_flags, cglo_flags; + tensor force_vir, shake_vir, total_vir, tmp_vir, pres; + int i, m; + t_trxstatus *status; + rvec mu_tot; + t_vcm *vcm; + matrix pcoupl_mu, M; + t_trxframe rerun_fr; + gmx_repl_ex_t repl_ex = NULL; + int nchkpt = 1; + gmx_localtop_t *top; + t_mdebin *mdebin = NULL; + t_state *state = NULL; + gmx_enerdata_t *enerd; + rvec *f = NULL; + gmx_global_stat_t gstat; + gmx_update_t *upd = NULL; + t_graph *graph = NULL; + gmx_signalling_t gs; + gmx_groups_t *groups; + gmx_ekindata_t *ekind; + gmx_shellfc_t *shellfc; + gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; + gmx_bool bResetCountersHalfMaxH = FALSE; + gmx_bool bTemp, bPres, bTrotter; + real dvdl_constr; + rvec *cbuf = NULL; + int cbuf_nalloc = 0; + matrix lastbox; + int lamnew = 0; + /* for FEP */ + int nstfep = 0; + double cycles; + real saved_conserved_quantity = 0; + real last_ekin = 0; + t_extmass MassQ; + int **trotter_seq; + char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; + int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ + gmx_int64_t multisim_nsteps = -1; /* number of steps to do before first multisim + simulation stops. If equal to zero, don't + communicate any more between multisims.*/ + /* PME load balancing data for GPU kernels */ + pme_load_balancing_t *pme_loadbal = NULL; + gmx_bool bPMETune = FALSE; + gmx_bool bPMETunePrinting = FALSE; + + /* Interactive MD */ + gmx_bool bIMDstep = FALSE; + gmx_membed_t *membed = NULL; + +#ifdef GMX_FAHCORE + /* Temporary addition for FAHCORE checkpointing */ + int chkpt_ret; +#endif + /* Domain decomposition could incorrectly miss a bonded + interaction, but checking for that requires a global + communication stage, which does not otherwise happen in DD + code. So we do that alongside the first global energy reduction + after a new DD is made. These variables handle whether the + check happens, and the result it returns. */ + bool shouldCheckNumberOfBondedInteractions = false; + int totalNumberOfBondedInteractions = -1; + + /* Check for special mdrun options */ + bRerunMD = (Flags & MD_RERUN); + if (Flags & MD_RESETCOUNTERSHALFWAY) + { + if (ir->nsteps > 0) + { + /* Signal to reset the counters half the simulation steps. */ + wcycle_set_reset_counters(wcycle, ir->nsteps/2); + } + /* Signal to reset the counters halfway the simulation time. */ + bResetCountersHalfMaxH = (max_hours > 0); + } + + /* md-vv uses averaged full step velocities for T-control + md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) + md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ + bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))); + + if (bRerunMD) + { + /* Since we don't know if the frames read are related in any way, + * rebuild the neighborlist at every step. + */ + ir->nstlist = 1; + ir->nstcalcenergy = 1; + nstglobalcomm = 1; + } + + nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); + bGStatEveryStep = (nstglobalcomm == 1); + + if (bRerunMD) + { + ir->nstxout_compressed = 0; + } + groups = &top_global->groups; + + if (opt2bSet("-membed", nfile, fnm)) + { + if (MASTER(cr)) + { + fprintf(stderr, "Initializing membed"); + } + /* Note that membed cannot work in parallel because mtop is + * changed here. Fix this if we ever want to make it run with + * multiple ranks. */ + membed = init_membed(fplog, nfile, fnm, top_global, ir, state_global, cr, &cpt_period); + } + + if (ir->eSwapCoords != eswapNO) + { + /* Initialize ion swapping code */ + init_swapcoords(fplog, bVerbose, ir, opt2fn_master("-swap", nfile, fnm, cr), + top_global, state_global->x, state_global->box, &state_global->swapstate, cr, oenv, Flags); + } + + /* Initial values */ + init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, + &(state_global->fep_state), lam0, + nrnb, top_global, &upd, + nfile, fnm, &outf, &mdebin, + force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle); + + clear_mat(total_vir); + clear_mat(pres); + /* Energy terms and groups */ + snew(enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + enerd); + if (DOMAINDECOMP(cr)) + { + f = NULL; + } + else + { + snew(f, top_global->natoms); + } + + /* Kinetic energy data */ + snew(ekind, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind); + /* Copy the cos acceleration to the groups struct */ + ekind->cosacc.cos_accel = ir->cos_accel; + + gstat = global_stat_init(ir); + + /* Check for polarizable models and flexible constraints */ + shellfc = init_shell_flexcon(fplog, + top_global, n_flexible_constraints(constr), + ir->nstcalcenergy, DOMAINDECOMP(cr)); + + if (shellfc && ir->nstcalcenergy != 1) + { + gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy); + } + if (shellfc && DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank"); + } + + if (inputrecDeform(ir)) + { + tMPI_Thread_mutex_lock(&deform_init_box_mutex); + set_deform_reference_box(upd, + deform_init_init_step_tpx, + deform_init_box_tpx); + tMPI_Thread_mutex_unlock(&deform_init_box_mutex); + } + + { + double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); + if ((io > 2000) && MASTER(cr)) + { + fprintf(stderr, + "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", + io); + } + } + + if (DOMAINDECOMP(cr)) + { + top = dd_init_local_top(top_global); + + snew(state, 1); + dd_init_local_state(cr->dd, state_global, state); + } + else + { + top = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO); + + forcerec_set_excl_load(fr, top); + + state = serial_init_local_state(state_global); + + atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); + + if (vsite) + { + set_vsite_top(vsite, top, mdatoms, cr); + } + + if (ir->ePBC != epbcNONE && !fr->bMolPBC) + { + graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); + } + + if (shellfc) + { + make_local_shells(cr, mdatoms, shellfc); + } + + setup_bonded_threading(fr, &top->idef); + + update_realloc(upd, state->nalloc); + } + + /* Set up interactive MD (IMD) */ + init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x, + nfile, fnm, oenv, imdport, Flags); + + if (DOMAINDECOMP(cr)) + { + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, constr, + nrnb, NULL, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->nalloc); + } + + update_mdatoms(mdatoms, state->lambda[efptMASS]); + + startingFromCheckpoint = Flags & MD_STARTFROMCPT; + + if (ir->bExpanded) + { + init_expanded_ensemble(startingFromCheckpoint, ir, &state->dfhist); + } + + if (MASTER(cr)) + { + if (startingFromCheckpoint) + { + /* Update mdebin with energy history if appending to output files */ + if (Flags & MD_APPENDFILES) + { + restore_energyhistory_from_state(mdebin, state_global->enerhist); + } + else + { + /* We might have read an energy history from checkpoint, + * free the allocated memory and reset the counts. + */ + done_energyhistory(state_global->enerhist); + init_energyhistory(state_global->enerhist); + } + } + /* Set the initial energy history in state by updating once */ + update_energyhistory(state_global->enerhist, mdebin); + } + + /* Initialize constraints */ + if (constr && !DOMAINDECOMP(cr)) + { + set_constraints(constr, top, ir, mdatoms, cr); + } + + if (repl_ex_nst > 0 && MASTER(cr)) + { + repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, + repl_ex_nst, repl_ex_nex, repl_ex_seed); + } + + /* PME tuning is only supported with PME for Coulomb. Is is not supported + * with only LJ PME, or for reruns. + */ + bPMETune = ((Flags & MD_TUNEPME) && EEL_PME(fr->eeltype) && !bRerunMD && + !(Flags & MD_REPRODUCIBLE)); + if (bPMETune) + { + pme_loadbal_init(&pme_loadbal, cr, fplog, ir, state->box, + fr->ic, fr->pmedata, use_GPU(fr->nbv), + &bPMETunePrinting); + } + + if (!ir->bContinuation && !bRerunMD) + { + if (mdatoms->cFREEZE && (state->flags & (1<<estV))) + { + /* Set the velocities of frozen particles to zero */ + for (i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) + { + state->v[i][m] = 0; + } + } + } + } + + if (constr) + { + /* Constrain the initial coordinates and velocities */ + do_constrain_first(fplog, constr, ir, mdatoms, state, + cr, nrnb, fr, top); + } + if (vsite) + { + /* Construct the virtual sites for the initial configuration */ + construct_vsites(vsite, state->x, ir->delta_t, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + } + } + + if (ir->efep != efepNO) + { + /* Set free energy calculation frequency as the greatest common + * denominator of nstdhdl and repl_ex_nst. */ + nstfep = ir->fepvals->nstdhdl; + if (ir->bExpanded) + { + nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep); + } + if (repl_ex_nst > 0) + { + nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep); + } + } + + /* Be REALLY careful about what flags you set here. You CANNOT assume + * this is the first step, since we might be restarting from a checkpoint, + * and in that case we should not do any modifications to the state. + */ + bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation); + + if (Flags & MD_READ_EKIN) + { + restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); + } + + cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT + | (bStopCM ? CGLO_STOPCM : 0) + | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0) + | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) + | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); + + bSumEkinhOld = FALSE; + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)); + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + if (ir->eI == eiVVAK) + { + /* a second call to get the half step temperature initialized as well */ + /* we do the same call as above, but turn the pressure off -- internally to + compute_globals, this is recognized as a velocity verlet half-step + kinetic energy calculation. This minimized excess variables, but + perhaps loses some logic?*/ + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + NULL, &bSumEkinhOld, + cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); + } + + /* Calculate the initial half step temperature, and save the ekinh_old */ + if (!(Flags & MD_STARTFROMCPT)) + { + for (i = 0; (i < ir->opts.ngtc); i++) + { + copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); + } + } + if (ir->eI != eiVV) + { + enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, + and there is no previous step */ + } + + /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter + temperature control */ + trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); + + if (MASTER(cr)) + { + if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) + { + fprintf(fplog, + "RMS relative constraint deviation after constraining: %.2e\n", + constr_rmsd(constr)); + } + if (EI_STATE_VELOCITY(ir->eI)) + { + fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); + } + if (bRerunMD) + { + fprintf(stderr, "starting md rerun '%s', reading coordinates from" + " input trajectory '%s'\n\n", + *(top_global->name), opt2fn("-rerun", nfile, fnm)); + if (bVerbose) + { + fprintf(stderr, "Calculated time to finish depends on nsteps from " + "run input file,\nwhich may not correspond to the time " + "needed to process input trajectory.\n\n"); + } + } + else + { + char tbuf[20]; + fprintf(stderr, "starting mdrun '%s'\n", + *(top_global->name)); + if (ir->nsteps >= 0) + { + sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); + } + else + { + sprintf(tbuf, "%s", "infinite"); + } + if (ir->init_step > 0) + { + fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", + gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, + gmx_step_str(ir->init_step, sbuf2), + ir->init_step*ir->delta_t); + } + else + { + fprintf(stderr, "%s steps, %s ps.\n", + gmx_step_str(ir->nsteps, sbuf), tbuf); + } + } + fprintf(fplog, "\n"); + } + + walltime_accounting_start(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, "mdrun"); + + /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ +#ifdef GMX_FAHCORE + chkpt_ret = fcCheckPointParallel( cr->nodeid, + NULL, 0); + if (chkpt_ret == 0) + { + gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); + } +#endif + + /*********************************************************** + * + * Loop over MD steps + * + ************************************************************/ + + /* if rerunMD then read coordinates and velocities from input trajectory */ + if (bRerunMD) + { + if (getenv("GMX_FORCE_UPDATE")) + { + bForceUpdate = TRUE; + } + + rerun_fr.natoms = 0; + if (MASTER(cr)) + { + bLastStep = !read_first_frame(oenv, &status, + opt2fn("-rerun", nfile, fnm), + &rerun_fr, TRX_NEED_X | TRX_READ_V); + if (rerun_fr.natoms != top_global->natoms) + { + gmx_fatal(FARGS, + "Number of atoms in trajectory (%d) does not match the " + "run input file (%d)\n", + rerun_fr.natoms, top_global->natoms); + } + if (ir->ePBC != epbcNONE) + { + if (!rerun_fr.bBox) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); + } + if (max_cutoff2(ir->ePBC, rerun_fr.box) < gmx::square(fr->rlist)) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); + } + } + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bLastStep); + } + + if (ir->ePBC != epbcNONE) + { + /* Set the shift vectors. + * Necessary here when have a static box different from the tpr box. + */ + calc_shifts(rerun_fr.box, fr->shift_vec); + } + } + + /* loop over MD steps or if rerunMD to end of input trajectory */ + bFirstStep = TRUE; + /* Skip the first Nose-Hoover integration when we get the state from tpx */ + bInitStep = !startingFromCheckpoint || EI_VV(ir->eI); + bSumEkinhOld = FALSE; + bExchanged = FALSE; + bNeedRepartition = FALSE; + + init_global_signals(&gs, cr, ir, repl_ex_nst); + + step = ir->init_step; + step_rel = 0; + + if (MULTISIM(cr) && (repl_ex_nst <= 0 )) + { + /* check how many steps are left in other sims */ + multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); + } + + + /* and stop now if we should */ + bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps) || + ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); + while (!bLastStep) + { + + /* Determine if this is a neighbor search step */ + bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); + + if (bPMETune && bNStList) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + pme_loadbal_do(pme_loadbal, cr, + (bVerbose && MASTER(cr)) ? stderr : NULL, + fplog, + ir, fr, state, + wcycle, + step, step_rel, + &bPMETunePrinting); + } + + wallcycle_start(wcycle, ewcSTEP); + + if (bRerunMD) + { + if (rerun_fr.bStep) + { + step = rerun_fr.step; + step_rel = step - ir->init_step; + } + if (rerun_fr.bTime) + { + t = rerun_fr.time; + } + else + { + t = step; + } + } + else + { + bLastStep = (step_rel == ir->nsteps); + t = t0 + step*ir->delta_t; + } + + if (ir->efep != efepNO || ir->bSimTemp) + { + /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, + requiring different logic. */ + + set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); + bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); + bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep)); + bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) + && (ir->bExpanded) && (step > 0) && (!startingFromCheckpoint)); + } + + bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep && + do_per_step(step, repl_ex_nst)); + + if (bSimAnn) + { + update_annealing_target_temp(ir, t, upd); + } + + if (bRerunMD) + { + if (!DOMAINDECOMP(cr) || MASTER(cr)) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.x[i], state_global->x[i]); + } + if (rerun_fr.bV) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.v[i], state_global->v[i]); + } + } + else + { + for (i = 0; i < state_global->natoms; i++) + { + clear_rvec(state_global->v[i]); + } + if (bRerunWarnNoV) + { + fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" + " Ekin, temperature and pressure are incorrect,\n" + " the virial will be incorrect when constraints are present.\n" + "\n"); + bRerunWarnNoV = FALSE; + } + } + } + copy_mat(rerun_fr.box, state_global->box); + copy_mat(state_global->box, state->box); + + if (vsite && (Flags & MD_RERUN_VSITE)) + { + if (DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank"); + } + if (graph) + { + /* Following is necessary because the graph may get out of sync + * with the coordinates if we only have every N'th coordinate set + */ + mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); + shift_self(graph, state->box, state->x); + } + construct_vsites(vsite, state->x, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + if (graph) + { + unshift_self(graph, state->box, state->x); + } + } + } + + /* Stop Center of Mass motion */ + bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); + + if (bRerunMD) + { + /* for rerun MD always do Neighbour Searching */ + bNS = (bFirstStep || ir->nstlist != 0); + bNStList = bNS; + } + else + { + /* Determine whether or not to do Neighbour Searching */ + bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition); + } + + /* check whether we should stop because another simulation has + stopped. */ + if (MULTISIM(cr)) + { + if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && + (multisim_nsteps != ir->nsteps) ) + { + if (bNS) + { + if (MASTER(cr)) + { + fprintf(stderr, + "Stopping simulation %d because another one has finished\n", + cr->ms->sim); + } + bLastStep = TRUE; + gs.sig[eglsCHKPT] = 1; + } + } + } + + /* < 0 means stop after this step, > 0 means stop at next NS step */ + if ( (gs.set[eglsSTOPCOND] < 0) || + ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) ) + { + bLastStep = TRUE; + } + + /* Determine whether or not to update the Born radii if doing GB */ + bBornRadii = bFirstStep; + if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) + { + bBornRadii = TRUE; + } + + /* do_log triggers energy and virial calculation. Because this leads + * to different code paths, forces can be different. Thus for exact + * continuation we should avoid extra log output. + * Note that the || bLastStep can result in non-exact continuation + * beyond the last step. But we don't consider that to be an issue. + */ + do_log = do_per_step(step, ir->nstlog) || (bFirstStep && !startingFromCheckpoint) || bLastStep || bRerunMD; + do_verbose = bVerbose && + (step % stepout == 0 || bFirstStep || bLastStep || bRerunMD); + + if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) + { + if (bRerunMD) + { + bMasterState = TRUE; + } + else + { + bMasterState = FALSE; + /* Correct the new box if it is too skewed */ + if (inputrecDynamicBox(ir)) + { + if (correct_box(fplog, step, state->box, graph)) + { + bMasterState = TRUE; + } + } + if (DOMAINDECOMP(cr) && bMasterState) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + if (DOMAINDECOMP(cr)) + { + /* Repartition the domain decomposition */ + dd_partition_system(fplog, step, cr, + bMasterState, nstglobalcomm, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, constr, + nrnb, wcycle, + do_verbose && !bPMETunePrinting); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->nalloc); + } + } + + if (MASTER(cr) && do_log) + { + print_ebin_header(fplog, step, t); /* can we improve the information printed here? */ + } + + if (ir->efep != efepNO) + { + update_mdatoms(mdatoms, state->lambda[efptMASS]); + } + + if ((bRerunMD && rerun_fr.bV) || bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS); + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + clear_mat(force_vir); + + /* We write a checkpoint at this MD step when: + * either at an NS step when we signalled through gs, + * or at the last step (but not when we do not want confout), + * but never at the first step or with rerun. + */ + bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || + (bLastStep && (Flags & MD_CONFOUT))) && + step > ir->init_step && !bRerunMD); + if (bCPT) + { + gs.set[eglsCHKPT] = 0; + } + + /* Determine the energy and pressure: + * at nstcalcenergy steps and at energy output steps (set below). + */ + if (EI_VV(ir->eI) && (!bInitStep)) + { + /* for vv, the first half of the integration actually corresponds + to the previous step. bCalcEner is only required to be evaluated on the 'next' step, + but the virial needs to be calculated on both the current step and the 'next' step. Future + reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ + + /* TODO: This is probably not what we want, we will write to energy file one step after nstcalcenergy steps. */ + bCalcEnerStep = do_per_step(step - 1, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); + } + else + { + bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); + } + bCalcEner = bCalcEnerStep; + + do_ene = (do_per_step(step, ir->nstenergy) || bLastStep || bRerunMD); + + if (do_ene || do_log || bDoReplEx) + { + bCalcVir = TRUE; + bCalcEner = TRUE; + } + + /* Do we need global communication ? */ + bGStat = (bCalcVir || bCalcEner || bStopCM || + do_per_step(step, nstglobalcomm) || + (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm))); + + force_flags = (GMX_FORCE_STATECHANGED | + ((inputrecDynamicBox(ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | + GMX_FORCE_ALLFORCES | + (bCalcVir ? GMX_FORCE_VIRIAL : 0) | + (bCalcEner ? GMX_FORCE_ENERGY : 0) | + (bDoFEP ? GMX_FORCE_DHDL : 0) + ); + + if (shellfc) + { + /* Now is the time to relax the shells */ + relax_shell_flexcon(fplog, cr, bVerbose, step, + ir, bNS, force_flags, top, + constr, enerd, fcd, + state, f, force_vir, mdatoms, + nrnb, wcycle, graph, groups, + shellfc, fr, bBornRadii, t, mu_tot, + vsite, mdoutf_get_fp_field(outf)); + } + else + { + /* The coordinates (x) are shifted (to get whole molecules) + * in do_force. + * This is parallellized as well, and does communication too. + * Check comments in sim_util.c + */ + do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, + state->box, state->x, &state->hist, + f, force_vir, mdatoms, enerd, fcd, + state->lambda, graph, + fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, + (bNS ? GMX_FORCE_NS : 0) | force_flags); + } + + if (EI_VV(ir->eI) && !startingFromCheckpoint && !bRerunMD) + /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ + { + rvec *vbuf = NULL; + + wallcycle_start(wcycle, ewcUPDATE); + if (ir->eI == eiVV && bInitStep) + { + /* if using velocity verlet with full time step Ekin, + * take the first half step only to compute the + * virial for the first step. From there, + * revert back to the initial coordinates + * so that the input is actually the initial step. + */ + snew(vbuf, state->natoms); + copy_rvecn(state->v, vbuf, 0, state->natoms); /* should make this better for parallelizing? */ + } + else + { + /* this is for NHC in the Ekin(t+dt/2) version of vv */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); + } + + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtVELOCITY1, + cr, constr); + + if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ + { + wallcycle_stop(wcycle, ewcUPDATE); + update_constraints(fplog, step, NULL, ir, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, shake_vir, + cr, nrnb, wcycle, upd, constr, + TRUE, bCalcVir); + wallcycle_start(wcycle, ewcUPDATE); + } + else if (graph) + { + /* Need to unshift here if a do_force has been + called in the previous step */ + unshift_self(graph, state->box, state->x); + } + /* if VV, compute the pressure and constraints */ + /* For VV2, we strictly only need this if using pressure + * control, but we really would like to have accurate pressures + * printed out. + * Think about ways around this in the future? + * For now, keep this choice in comments. + */ + /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */ + /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/ + bPres = TRUE; + bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); + if (bCalcEner && ir->eI == eiVVAK) + { + bSumEkinhOld = TRUE; + } + /* for vv, the first half of the integration actually corresponds to the previous step. + So we need information from the last step in the first half of the integration */ + if (bGStat || do_per_step(step-1, nstglobalcomm)) + { + wallcycle_stop(wcycle, ewcUPDATE); + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | CGLO_ENERGY + | (bTemp ? CGLO_TEMPERATURE : 0) + | (bPres ? CGLO_PRESSURE : 0) + | (bPres ? CGLO_CONSTRAINT : 0) + | (bStopCM ? CGLO_STOPCM : 0) + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + | CGLO_SCALEEKIN + ); + /* explanation of above: + a) We compute Ekin at the full time step + if 1) we are using the AveVel Ekin, and it's not the + initial step, or 2) if we are using AveEkin, but need the full + time step kinetic energy for the pressure (always true now, since we want accurate statistics). + b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in + EkinAveVel because it's needed for the pressure */ + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + wallcycle_start(wcycle, ewcUPDATE); + } + /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ + if (!bInitStep) + { + if (bTrotter) + { + m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); + + copy_mat(shake_vir, state->svir_prev); + copy_mat(force_vir, state->fvir_prev); + if (inputrecNvtTrotter(ir) && ir->eI == eiVV) + { + /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ + enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE); + enerd->term[F_EKIN] = trace(ekind->ekin); + } + } + else if (bExchanged) + { + wallcycle_stop(wcycle, ewcUPDATE); + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + NULL, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE); + wallcycle_start(wcycle, ewcUPDATE); + } + } + /* if it's the initial step, we performed this first step just to get the constraint virial */ + if (ir->eI == eiVV && bInitStep) + { + copy_rvecn(vbuf, state->v, 0, state->natoms); + sfree(vbuf); + } + wallcycle_stop(wcycle, ewcUPDATE); + } + + /* compute the conserved quantity */ + if (EI_VV(ir->eI)) + { + saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); + if (ir->eI == eiVV) + { + last_ekin = enerd->term[F_EKIN]; + } + if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) + { + saved_conserved_quantity -= enerd->term[F_DISPCORR]; + } + /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ + if (ir->efep != efepNO && !bRerunMD) + { + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + } + + /* ######## END FIRST UPDATE STEP ############## */ + /* ######## If doing VV, we now have v(dt) ###### */ + if (bDoExpanded) + { + /* perform extended ensemble sampling in lambda - we don't + actually move to the new state before outputting + statistics, but if performing simulated tempering, we + do update the velocities and the tau_t. */ + + lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms); + /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ + copy_df_history(&state_global->dfhist, &state->dfhist); + } + + /* Now we have the energies and forces corresponding to the + * coordinates at time t. We must output all of this before + * the update. + */ + do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, + ir, state, state_global, top_global, fr, + outf, mdebin, ekind, f, + &nchkpt, + bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), + bSumEkinhOld); + /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ + bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle); + + /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */ + if (startingFromCheckpoint && bTrotter) + { + copy_mat(state->svir_prev, shake_vir); + copy_mat(state->fvir_prev, force_vir); + } + + elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting); + + /* Check whether everything is still allright */ + if (((int)gmx_get_stop_condition() > handled_stop_condition) +#if GMX_THREAD_MPI + && MASTER(cr) +#endif + ) + { + int nsteps_stop = -1; + + /* this is just make gs.sig compatible with the hack + of sending signals around by MPI_Reduce with together with + other floats */ + if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) + { + gs.sig[eglsSTOPCOND] = 1; + nsteps_stop = std::max(ir->nstlist, 2*nstglobalcomm); + } + if (gmx_get_stop_condition() == gmx_stop_cond_next) + { + gs.sig[eglsSTOPCOND] = -1; + nsteps_stop = nstglobalcomm + 1; + } + if (fplog) + { + fprintf(fplog, + "\n\nReceived the %s signal, stopping within %d steps\n\n", + gmx_get_signal_name(), nsteps_stop); + fflush(fplog); + } + fprintf(stderr, + "\n\nReceived the %s signal, stopping within %d steps\n\n", + gmx_get_signal_name(), nsteps_stop); + fflush(stderr); + handled_stop_condition = (int)gmx_get_stop_condition(); + } + else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && + (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) && + gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) + { + /* Signal to terminate the run */ + gs.sig[eglsSTOPCOND] = 1; + if (fplog) + { + fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + + if (bResetCountersHalfMaxH && MASTER(cr) && + elapsed_time > max_hours*60.0*60.0*0.495) + { + /* Set flag that will communicate the signal to all ranks in the simulation */ + gs.sig[eglsRESETCOUNTERS] = 1; + } + + /* In parallel we only have to check for checkpointing in steps + * where we do global communication, + * otherwise the other nodes don't know. + */ + if (MASTER(cr) && ((bGStat || !PAR(cr)) && + cpt_period >= 0 && + (cpt_period == 0 || + elapsed_time >= nchkpt*cpt_period*60.0)) && + gs.set[eglsCHKPT] == 0) + { + gs.sig[eglsCHKPT] = 1; + } + + /* ######### START SECOND UPDATE STEP ################# */ + + /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled + in preprocessing */ + + if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ + { + gmx_bool bIfRandomize; + bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr); + /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ + if (constr && bIfRandomize) + { + update_constraints(fplog, step, NULL, ir, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, + cr, nrnb, wcycle, upd, constr, + TRUE, bCalcVir); + } + } + /* Box is changed in update() when we do pressure coupling, + * but we should still use the old box for energy corrections and when + * writing it to the energy file, so it matches the trajectory files for + * the same timestep above. Make a copy in a separate array. + */ + copy_mat(state->box, lastbox); + + dvdl_constr = 0; + + if (!bRerunMD || rerun_fr.bV || bForceUpdate) + { + wallcycle_start(wcycle, ewcUPDATE); + /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ + if (bTrotter) + { + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); + /* We can only do Berendsen coupling after we have summed + * the kinetic energy or virial. Since the happens + * in global_state after update, we should only do it at + * step % nstlist = 1 with bGStatEveryStep=FALSE. + */ + } + else + { + update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); + update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep); + } + + if (EI_VV(ir->eI)) + { + /* velocity half-step update */ + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtVELOCITY2, + cr, constr); + } + + /* Above, initialize just copies ekinh into ekin, + * it doesn't copy position (for VV), + * and entire integrator for MD. + */ + + if (ir->eI == eiVVAK) + { + /* We probably only need md->homenr, not state->natoms */ + if (state->natoms > cbuf_nalloc) + { + cbuf_nalloc = state->natoms; + srenew(cbuf, cbuf_nalloc); + } + copy_rvecn(state->x, cbuf, 0, state->natoms); + } + + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + update_constraints(fplog, step, &dvdl_constr, ir, mdatoms, state, + fr->bMolPBC, graph, f, + &top->idef, shake_vir, + cr, nrnb, wcycle, upd, constr, + FALSE, bCalcVir); + + if (ir->eI == eiVVAK) + { + /* erase F_EKIN and F_TEMP here? */ + /* just compute the kinetic energy at the half step to perform a trotter step */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, lastbox, + NULL, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE + ); + wallcycle_start(wcycle, ewcUPDATE); + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); + /* now we know the scaling, we can compute the positions again again */ + copy_rvecn(cbuf, state->x, 0, state->natoms); + + update_coords(fplog, step, ir, mdatoms, state, f, fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ + /* are the small terms in the shake_vir here due + * to numerical errors, or are they important + * physically? I'm thinking they are just errors, but not completely sure. + * For now, will call without actually constraining, constr=NULL*/ + update_constraints(fplog, step, NULL, ir, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, + cr, nrnb, wcycle, upd, NULL, + FALSE, bCalcVir); + } + if (EI_VV(ir->eI)) + { + /* this factor or 2 correction is necessary + because half of the constraint force is removed + in the vv step, so we have to double it. See + the Redmine issue #1255. It is not yet clear + if the factor of 2 is exact, or just a very + good approximation, and this will be + investigated. The next step is to see if this + can be done adding a dhdl contribution from the + rattle step, but this is somewhat more + complicated with the current code. Will be + investigated, hopefully for 4.6.3. However, + this current solution is much better than + having it completely wrong. + */ + enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; + } + else + { + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + } + } + else if (graph) + { + /* Need to unshift here */ + unshift_self(graph, state->box, state->x); + } + + if (vsite != NULL) + { + wallcycle_start(wcycle, ewcVSITECONSTR); + if (graph != NULL) + { + shift_self(graph, state->box, state->x); + } + construct_vsites(vsite, state->x, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + + if (graph != NULL) + { + unshift_self(graph, state->box, state->x); + } + wallcycle_stop(wcycle, ewcVSITECONSTR); + } + + /* ############## IF NOT VV, Calculate globals HERE ############ */ + /* With Leap-Frog we can skip compute_globals at + * non-communication steps, but we need to calculate + * the kinetic energy one step before communication. + */ + if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) + { + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &gs, + (step_rel % gs.nstms == 0) && + (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), + lastbox, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) + | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) + | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) + | CGLO_CONSTRAINT + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + ); + checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + + /* ############# END CALC EKIN AND PRESSURE ################# */ + + /* Note: this is OK, but there are some numerical precision issues with using the convergence of + the virial that should probably be addressed eventually. state->veta has better properies, + but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could + generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ + + if (ir->efep != efepNO && (!EI_VV(ir->eI) || bRerunMD)) + { + /* Sum up the foreign energy and dhdl terms for md and sd. + Currently done every step so that dhdl is correct in the .edr */ + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + update_box(fplog, step, ir, mdatoms, state, f, + pcoupl_mu, nrnb, upd); + + /* ################# END UPDATE STEP 2 ################# */ + /* #### We now have r(t+dt) and v(t+dt/2) ############# */ + + /* The coordinates (x) were unshifted in update */ + if (!bGStat) + { + /* We will not sum ekinh_old, + * so signal that we still have to do it. + */ + bSumEkinhOld = TRUE; + } + + /* ######### BEGIN PREPARING EDR OUTPUT ########### */ + + /* use the directly determined last velocity, not actually the averaged half steps */ + if (bTrotter && ir->eI == eiVV) + { + enerd->term[F_EKIN] = last_ekin; + } + enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; + + if (EI_VV(ir->eI)) + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; + } + else + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); + } + /* ######### END PREPARING EDR OUTPUT ########### */ + + /* Output stuff */ + if (MASTER(cr)) + { + if (fplog && do_log && bDoExpanded) + { + /* only needed if doing expanded ensemble */ + PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, + &state_global->dfhist, state->fep_state, ir->nstlog, step); + } + if (bCalcEner) + { + upd_mdebin(mdebin, bDoDHDL, bCalcEnerStep, + t, mdatoms->tmass, enerd, state, + ir->fepvals, ir->expandedvals, lastbox, + shake_vir, force_vir, total_vir, pres, + ekind, mu_tot, constr); + } + else + { + upd_mdebin_step(mdebin); + } + + gmx_bool do_dr = do_per_step(step, ir->nstdisreout); + gmx_bool do_or = do_per_step(step, ir->nstorireout); + + print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL, + step, t, + eprNORMAL, mdebin, fcd, groups, &(ir->opts)); + + if (ir->bPull) + { + pull_print_output(ir->pull_work, step, t); + } + + if (do_per_step(step, ir->nstlog)) + { + if (fflush(fplog) != 0) + { + gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); + } + } + } + if (bDoExpanded) + { + /* Have to do this part _after_ outputting the logfile and the edr file */ + /* Gets written into the state at the beginning of next loop*/ + state->fep_state = lamnew; + } + /* Print the remaining wall clock time for the run */ + if (MULTIMASTER(cr) && + (do_verbose || gmx_got_usr_signal()) && + !bPMETunePrinting) + { + if (shellfc) + { + fprintf(stderr, "\n"); + } + print_time(stderr, walltime_accounting, step, ir, cr); + } + + /* Ion/water position swapping. + * Not done in last step since trajectory writing happens before this call + * in the MD loop and exchanges would be lost anyway. */ + bNeedRepartition = FALSE; + if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && + do_per_step(step, ir->swap->nstswap)) + { + bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, + bRerunMD ? rerun_fr.x : state->x, + bRerunMD ? rerun_fr.box : state->box, + top_global, MASTER(cr) && bVerbose, bRerunMD); + + if (bNeedRepartition && DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + /* Replica exchange */ + bExchanged = FALSE; + if (bDoReplEx) + { + bExchanged = replica_exchange(fplog, cr, repl_ex, + state_global, enerd, + state, step, t); + } + + if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) + { + dd_partition_system(fplog, step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, constr, + nrnb, wcycle, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->nalloc); + } + + bFirstStep = FALSE; + bInitStep = FALSE; + startingFromCheckpoint = FALSE; + + /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ + /* With all integrators, except VV, we need to retain the pressure + * at the current step for coupling at the next step. + */ + if ((state->flags & (1<<estPRES_PREV)) && + (bGStatEveryStep || + (ir->nstpcouple > 0 && step % ir->nstpcouple == 0))) + { + /* Store the pressure in t_state for pressure coupling + * at the next MD step. + */ + copy_mat(pres, state->pres_prev); + } + + /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ + + if ( (membed != NULL) && (!bLastStep) ) + { + rescale_membed(step_rel, membed, state_global->x); + } + + if (bRerunMD) + { + if (MASTER(cr)) + { + /* read next frame from input trajectory */ + bLastStep = !read_next_frame(oenv, status, &rerun_fr); + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bLastStep); + } + } + + cycles = wallcycle_stop(wcycle, ewcSTEP); + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles, ddCyclStep); + } + + if (!bRerunMD || !rerun_fr.bStep) + { + /* increase the MD step number */ + step++; + step_rel++; + } + + /* TODO make a counter-reset module */ + /* If it is time to reset counters, set a flag that remains + true until counters actually get reset */ + if (step_rel == wcycle_get_reset_counters(wcycle) || + gs.set[eglsRESETCOUNTERS] != 0) + { + if (pme_loadbal_is_active(pme_loadbal)) + { + /* Do not permit counter reset while PME load + * balancing is active. The only purpose for resetting + * counters is to measure reliable performance data, + * and that can't be done before balancing + * completes. + * + * TODO consider fixing this by delaying the reset + * until after load balancing completes, + * e.g. https://gerrit.gromacs.org/#/c/4964/2 */ + gmx_fatal(FARGS, "PME tuning was still active when attempting to " + "reset mdrun counters at step %" GMX_PRId64 ". Try " + "resetting counters later in the run, e.g. with gmx " + "mdrun -resetstep.", step); + } + reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, + use_GPU(fr->nbv) ? fr->nbv : NULL); + wcycle_set_reset_counters(wcycle, -1); + if (!(cr->duty & DUTY_PME)) + { + /* Tell our PME node to reset its counters */ + gmx_pme_send_resetcounters(cr, step); + } + /* Correct max_hours for the elapsed time */ + max_hours -= elapsed_time/(60.0*60.0); + /* If mdrun -maxh -resethway was active, it can only trigger once */ + bResetCountersHalfMaxH = FALSE; /* TODO move this to where gs.sig[eglsRESETCOUNTERS] is set */ + /* Reset can only happen once, so clear the triggering flag. */ + gs.set[eglsRESETCOUNTERS] = 0; + } + + /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ + IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle); + + } + /* End of main MD loop */ + + /* Closing TNG files can include compressing data. Therefore it is good to do that + * before stopping the time measurements. */ + mdoutf_tng_close(outf); + + /* Stop measuring walltime */ + walltime_accounting_end(walltime_accounting); + + if (bRerunMD && MASTER(cr)) + { + close_trj(status); + } + + if (!(cr->duty & DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + if (MASTER(cr)) + { + if (ir->nstcalcenergy > 0 && !bRerunMD) + { + print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, + eprAVER, mdebin, fcd, groups, &(ir->opts)); + } + } + + done_mdoutf(outf); + + if (bPMETune) + { + pme_loadbal_done(pme_loadbal, cr, fplog, use_GPU(fr->nbv)); + } + + done_shellfc(fplog, shellfc, step_rel); + + if (repl_ex_nst > 0 && MASTER(cr)) + { + print_replica_exchange_statistics(fplog, repl_ex); + } + + // Clean up swapcoords + if (ir->eSwapCoords != eswapNO) + { + finish_swapcoords(ir->swap); + } + + if (membed != nullptr) + { + free_membed(membed); + } + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(ir->bIMD, ir->imd); + + walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); + + return 0; +} diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/mdrun.cpp b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/mdrun.cpp new file mode 100644 index 0000000000000000000000000000000000000000..55f3529faaba83f8ee5e66744004b578c9bce6d5 --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/mdrun.cpp @@ -0,0 +1,577 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \defgroup module_mdrun Implementation of mdrun + * \ingroup group_mdrun + * + * \brief This module contains code that implements mdrun. + */ +/*! \internal \file + * + * \brief This file implements mdrun + * + * \author Berk Hess <hess@kth.se> + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Erik Lindahl <erik@kth.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "config.h" + +#include <stdio.h> +#include <string.h> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/commandline/pargs.h" +#include "gromacs/fileio/readinp.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/mdlib/main.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdrunutility/handlerestart.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/utility/arraysize.h" +#include "gromacs/utility/fatalerror.h" + +#include "mdrun_main.h" +#include "runner.h" + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +extern void(*plumedcmd)(plumed,const char*,const void*); +/* END PLUMED */ + +/*! \brief Return whether either of the command-line parameters that + * will trigger a multi-simulation is set */ +static bool is_multisim_option_set(int argc, const char *const argv[]) +{ + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "-multi") == 0 || strcmp(argv[i], "-multidir") == 0) + { + return true; + } + } + return false; +} + +//! Implements C-style main function for mdrun +int gmx_mdrun(int argc, char *argv[]) +{ + const char *desc[] = { + "[THISMODULE] is the main computational chemistry engine", + "within GROMACS. Obviously, it performs Molecular Dynamics simulations,", + "but it can also perform Stochastic Dynamics, Energy Minimization,", + "test particle insertion or (re)calculation of energies.", + "Normal mode analysis is another option. In this case [TT]mdrun[tt]", + "builds a Hessian matrix from single conformation.", + "For usual Normal Modes-like calculations, make sure that", + "the structure provided is properly energy-minimized.", + "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]", + "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])", + "and distributes the topology over ranks if needed.", + "[TT]mdrun[tt] produces at least four output files.", + "A single log file ([TT]-g[tt]) is written.", + "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and", + "optionally forces.", + "The structure file ([TT]-c[tt]) contains the coordinates and", + "velocities of the last step.", + "The energy file ([TT]-e[tt]) contains energies, the temperature,", + "pressure, etc, a lot of these things are also printed in the log file.", + "Optionally coordinates can be written to a compressed trajectory file", + "([TT]-x[tt]).[PAR]", + "The option [TT]-dhdl[tt] is only used when free energy calculation is", + "turned on.[PAR]", + "Running mdrun efficiently in parallel is a complex topic topic,", + "many aspects of which are covered in the online User Guide. You", + "should look there for practical advice on using many of the options", + "available in mdrun.[PAR]", + "ED (essential dynamics) sampling and/or additional flooding potentials", + "are switched on by using the [TT]-ei[tt] flag followed by an [REF].edi[ref]", + "file. The [REF].edi[ref] file can be produced with the [TT]make_edi[tt] tool", + "or by using options in the essdyn menu of the WHAT IF program.", + "[TT]mdrun[tt] produces a [REF].xvg[ref] output file that", + "contains projections of positions, velocities and forces onto selected", + "eigenvectors.[PAR]", + "When user-defined potential functions have been selected in the", + "[REF].mdp[ref] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]", + "a formatted table with potential functions. The file is read from", + "either the current directory or from the [TT]GMXLIB[tt] directory.", + "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,", + "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with", + "normal Coulomb.", + "When pair interactions are present, a separate table for pair interaction", + "functions is read using the [TT]-tablep[tt] option.[PAR]", + "When tabulated bonded functions are present in the topology,", + "interaction functions are read using the [TT]-tableb[tt] option.", + "For each different tabulated interaction type the table file name is", + "modified in a different way: before the file extension an underscore is", + "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals", + "and finally the table number of the interaction type.[PAR]", + "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM", + "coordinates and forces when pulling is selected", + "in the [REF].mdp[ref] file.[PAR]", + "Finally some experimental algorithms can be tested when the", + "appropriate options have been given. Currently under", + "investigation are: polarizability.", + "[PAR]", + "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed", + "a protein into a membrane. This module requires a number of settings", + "that are provided in a data file that is the argument of this option.", + "For more details in membrane embedding, see the documentation in the", + "user guide. The options [TT]-mn[tt] and [TT]-mp[tt] are used to provide", + "the index and topology files used for the embedding.", + "[PAR]", + "The option [TT]-pforce[tt] is useful when you suspect a simulation", + "crashes due to too large forces. With this option coordinates and", + "forces of atoms with a force larger than a certain value will", + "be printed to stderr.", + "[PAR]", + "Checkpoints containing the complete state of the system are written", + "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],", + "unless option [TT]-cpt[tt] is set to -1.", + "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to", + "make sure that a recent state of the system is always available,", + "even when the simulation is terminated while writing a checkpoint.", + "With [TT]-cpnum[tt] all checkpoint files are kept and appended", + "with the step number.", + "A simulation can be continued by reading the full state from file", + "with option [TT]-cpi[tt]. This option is intelligent in the way that", + "if no checkpoint file is found, GROMACS just assumes a normal run and", + "starts from the first step of the [REF].tpr[ref] file. By default the output", + "will be appending to the existing output files. The checkpoint file", + "contains checksums of all output files, such that you will never", + "loose data when some output files are modified, corrupt or removed.", + "There are three scenarios with [TT]-cpi[tt]:[PAR]", + "[TT]*[tt] no files with matching names are present: new output files are written[PAR]", + "[TT]*[tt] all files are present with names and checksums matching those stored", + "in the checkpoint file: files are appended[PAR]", + "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]", + "With [TT]-noappend[tt] new output files are opened and the simulation", + "part number is added to all output file names.", + "Note that in all cases the checkpoint file itself is not renamed", + "and will be overwritten, unless its name does not match", + "the [TT]-cpo[tt] option.", + "[PAR]", + "With checkpointing the output is appended to previously written", + "output files, unless [TT]-noappend[tt] is used or none of the previous", + "output files are present (except for the checkpoint file).", + "The integrity of the files to be appended is verified using checksums", + "which are stored in the checkpoint file. This ensures that output can", + "not be mixed up or corrupted due to file appending. When only some", + "of the previous output files are present, a fatal error is generated", + "and no old output files are modified and no new output files are opened.", + "The result with appending will be the same as from a single run.", + "The contents will be binary identical, unless you use a different number", + "of ranks or dynamic load balancing or the FFT library uses optimizations", + "through timing.", + "[PAR]", + "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint", + "file is written at the first neighbor search step where the run time", + "exceeds [TT]-maxh[tt]\\*0.99 hours. This option is particularly useful in", + "combination with setting [TT]nsteps[tt] to -1 either in the mdp or using the", + "similarly named command line option. This results in an infinite run,", + "terminated only when the time limit set by [TT]-maxh[tt] is reached (if any)" + "or upon receiving a signal." + "[PAR]", + "When [TT]mdrun[tt] receives a TERM signal, it will stop as soon as", + "checkpoint file can be written, i.e. after the next global communication step.", + "When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is", + "pressed), it will stop at the next neighbor search step or at the", + "second global communication step, whichever happens later.", + "In both cases all the usual output will be written to file.", + "When running with MPI, a signal to one of the [TT]mdrun[tt] ranks", + "is sufficient, this signal should not be sent to mpirun or", + "the [TT]mdrun[tt] process that is the parent of the others.", + "[PAR]", + "Interactive molecular dynamics (IMD) can be activated by using at least one", + "of the three IMD switches: The [TT]-imdterm[tt] switch allows one to terminate", + "the simulation from the molecular viewer (e.g. VMD). With [TT]-imdwait[tt],", + "[TT]mdrun[tt] pauses whenever no IMD client is connected. Pulling from the", + "IMD remote can be turned on by [TT]-imdpull[tt].", + "The port [TT]mdrun[tt] listens to can be altered by [TT]-imdport[tt].The", + "file pointed to by [TT]-if[tt] contains atom indices and forces if IMD", + "pulling is used." + "[PAR]", + "When [TT]mdrun[tt] is started with MPI, it does not run niced by default." + }; + t_commrec *cr; + t_filenm fnm[] = { + { efTPR, NULL, NULL, ffREAD }, + { efTRN, "-o", NULL, ffWRITE }, + { efCOMPRESSED, "-x", NULL, ffOPTWR }, + { efCPT, "-cpi", NULL, ffOPTRD | ffALLOW_MISSING }, + { efCPT, "-cpo", NULL, ffOPTWR }, + { efSTO, "-c", "confout", ffWRITE }, + { efEDR, "-e", "ener", ffWRITE }, + { efLOG, "-g", "md", ffWRITE }, + { efXVG, "-dhdl", "dhdl", ffOPTWR }, + { efXVG, "-field", "field", ffOPTWR }, + { efXVG, "-table", "table", ffOPTRD }, + { efXVG, "-tablep", "tablep", ffOPTRD }, + { efXVG, "-tableb", "table", ffOPTRD }, + { efTRX, "-rerun", "rerun", ffOPTRD }, + { efXVG, "-tpi", "tpi", ffOPTWR }, + { efXVG, "-tpid", "tpidist", ffOPTWR }, + { efEDI, "-ei", "sam", ffOPTRD }, + { efXVG, "-eo", "edsam", ffOPTWR }, + { efXVG, "-devout", "deviatie", ffOPTWR }, + { efXVG, "-runav", "runaver", ffOPTWR }, + { efXVG, "-px", "pullx", ffOPTWR }, + { efXVG, "-pf", "pullf", ffOPTWR }, + { efXVG, "-ro", "rotation", ffOPTWR }, + { efLOG, "-ra", "rotangles", ffOPTWR }, + { efLOG, "-rs", "rotslabs", ffOPTWR }, + { efLOG, "-rt", "rottorque", ffOPTWR }, + { efMTX, "-mtx", "nm", ffOPTWR }, + { efRND, "-multidir", NULL, ffOPTRDMULT}, + { efDAT, "-plumed", "plumed", ffOPTRD }, /* PLUMED */ + { efDAT, "-membed", "membed", ffOPTRD }, + { efTOP, "-mp", "membed", ffOPTRD }, + { efNDX, "-mn", "membed", ffOPTRD }, + { efXVG, "-if", "imdforces", ffOPTWR }, + { efXVG, "-swap", "swapions", ffOPTWR } + }; + const int NFILE = asize(fnm); + + /* Command line options ! */ + gmx_bool bDDBondCheck = TRUE; + gmx_bool bDDBondComm = TRUE; + gmx_bool bTunePME = TRUE; + gmx_bool bVerbose = FALSE; + gmx_bool bRerunVSite = FALSE; + gmx_bool bConfout = TRUE; + gmx_bool bReproducible = FALSE; + gmx_bool bIMDwait = FALSE; + gmx_bool bIMDterm = FALSE; + gmx_bool bIMDpull = FALSE; + + int npme = -1; + int nstlist = 0; + int nmultisim = 0; + int nstglobalcomm = -1; + int repl_ex_nst = 0; + int repl_ex_seed = -1; + int repl_ex_nex = 0; + int nstepout = 100; + int resetstep = -1; + gmx_int64_t nsteps = -2; /* the value -2 means that the mdp option will be used */ + int imdport = 8888; /* can be almost anything, 8888 is easy to remember */ + + rvec realddxyz = {0, 0, 0}; + const char *ddrank_opt[ddrankorderNR+1] = + { NULL, "interleave", "pp_pme", "cartesian", NULL }; + const char *dddlb_opt[] = + { NULL, "auto", "no", "yes", NULL }; + const char *thread_aff_opt[threadaffNR+1] = + { NULL, "auto", "on", "off", NULL }; + const char *nbpu_opt[] = + { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL }; + real rdd = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1; + char *ddcsx = NULL, *ddcsy = NULL, *ddcsz = NULL; + real cpt_period = 15.0, max_hours = -1; + gmx_bool bTryToAppendFiles = TRUE; + gmx_bool bKeepAndNumCPT = FALSE; + gmx_bool bResetCountersHalfWay = FALSE; + gmx_output_env_t *oenv = NULL; + + /* Non transparent initialization of a complex gmx_hw_opt_t struct. + * But unfortunately we are not allowed to call a function here, + * since declarations follow below. + */ + gmx_hw_opt_t hw_opt = { + 0, 0, 0, 0, threadaffSEL, 0, 0, + { NULL, FALSE, 0, NULL } + }; + + t_pargs pa[] = { + + { "-dd", FALSE, etRVEC, {&realddxyz}, + "Domain decomposition grid, 0 is optimize" }, + { "-ddorder", FALSE, etENUM, {ddrank_opt}, + "DD rank order" }, + { "-npme", FALSE, etINT, {&npme}, + "Number of separate ranks to be used for PME, -1 is guess" }, + { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, + "Total number of threads to start (0 is guess)" }, + { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, + "Number of thread-MPI threads to start (0 is guess)" }, + { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, + "Number of OpenMP threads per MPI rank to start (0 is guess)" }, + { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, + "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, + { "-pin", FALSE, etENUM, {thread_aff_opt}, + "Whether mdrun should try to set thread affinities" }, + { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, + "The lowest logical core number to which mdrun should pin the first thread" }, + { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, + "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, + { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id}, + "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" }, + { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck}, + "Check for all bonded interactions with DD" }, + { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm}, + "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, + { "-rdd", FALSE, etREAL, {&rdd}, + "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, + { "-rcon", FALSE, etREAL, {&rconstr}, + "Maximum distance for P-LINCS (nm), 0 is estimate" }, + { "-dlb", FALSE, etENUM, {dddlb_opt}, + "Dynamic load balancing (with DD)" }, + { "-dds", FALSE, etREAL, {&dlb_scale}, + "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to " + "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." }, + { "-ddcsx", FALSE, etSTR, {&ddcsx}, + "HIDDENA string containing a vector of the relative sizes in the x " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsy", FALSE, etSTR, {&ddcsy}, + "HIDDENA string containing a vector of the relative sizes in the y " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsz", FALSE, etSTR, {&ddcsz}, + "HIDDENA string containing a vector of the relative sizes in the z " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-gcom", FALSE, etINT, {&nstglobalcomm}, + "Global communication frequency" }, + { "-nb", FALSE, etENUM, {&nbpu_opt}, + "Calculate non-bonded interactions on" }, + { "-nstlist", FALSE, etINT, {&nstlist}, + "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, + { "-tunepme", FALSE, etBOOL, {&bTunePME}, + "Optimize PME load between PP/PME ranks or GPU/CPU" }, + { "-v", FALSE, etBOOL, {&bVerbose}, + "Be loud and noisy" }, + { "-pforce", FALSE, etREAL, {&pforce}, + "Print all forces larger than this (kJ/mol nm)" }, + { "-reprod", FALSE, etBOOL, {&bReproducible}, + "Try to avoid optimizations that affect binary reproducibility" }, + { "-cpt", FALSE, etREAL, {&cpt_period}, + "Checkpoint interval (minutes)" }, + { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT}, + "Keep and number checkpoint files" }, + { "-append", FALSE, etBOOL, {&bTryToAppendFiles}, + "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, + { "-nsteps", FALSE, etINT64, {&nsteps}, + "Run this number of steps, overrides .mdp file option (-1 means infinite, -2 means use mdp option, smaller is invalid)" }, + { "-maxh", FALSE, etREAL, {&max_hours}, + "Terminate after 0.99 times this time (hours)" }, + { "-multi", FALSE, etINT, {&nmultisim}, + "Do multiple simulations in parallel" }, + { "-replex", FALSE, etINT, {&repl_ex_nst}, + "Attempt replica exchange periodically with this period (steps)" }, + { "-nex", FALSE, etINT, {&repl_ex_nex}, + "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, + { "-reseed", FALSE, etINT, {&repl_ex_seed}, + "Seed for replica exchange, -1 is generate a seed" }, + { "-imdport", FALSE, etINT, {&imdport}, + "HIDDENIMD listening port" }, + { "-imdwait", FALSE, etBOOL, {&bIMDwait}, + "HIDDENPause the simulation while no IMD client is connected" }, + { "-imdterm", FALSE, etBOOL, {&bIMDterm}, + "HIDDENAllow termination of the simulation from IMD client" }, + { "-imdpull", FALSE, etBOOL, {&bIMDpull}, + "HIDDENAllow pulling in the simulation from IMD client" }, + { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite}, + "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, + { "-confout", FALSE, etBOOL, {&bConfout}, + "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, + { "-stepout", FALSE, etINT, {&nstepout}, + "HIDDENFrequency of writing the remaining wall clock time for the run" }, + { "-resetstep", FALSE, etINT, {&resetstep}, + "HIDDENReset cycle counters after these many time steps" }, + { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay}, + "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } + }; + unsigned long Flags; + ivec ddxyz; + int dd_rank_order; + gmx_bool bDoAppendFiles, bStartFromCpt; + FILE *fplog; + int rc; + char **multidir = NULL; + + cr = init_commrec(); + + unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM; + // With -multi or -multidir, the file names are going to get processed + // further (or the working directory changed), so we can't check for their + // existence during parsing. It isn't useful to do any completion based on + // file system contents, either. + if (is_multisim_option_set(argc, argv)) + { + PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING; + } + + /* Comment this in to do fexist calls only on master + * works not with rerun or tables at the moment + * also comment out the version of init_forcerec in md.c + * with NULL instead of opt2fn + */ + /* + if (!MASTER(cr)) + { + PCA_Flags |= PCA_NOT_READ_NODE; + } + */ + + if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa, + asize(desc), desc, 0, NULL, &oenv)) + { + return 0; + } + + + dd_rank_order = nenum(ddrank_opt); + + hw_opt.thread_affinity = nenum(thread_aff_opt); + + /* now check the -multi and -multidir option */ + if (opt2bSet("-multidir", NFILE, fnm)) + { + if (nmultisim > 0) + { + gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive."); + } + nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm); + } + + + if (repl_ex_nst != 0 && nmultisim < 2) + { + gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)"); + } + + if (repl_ex_nex < 0) + { + gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); + } + + if (nmultisim >= 1) + { +#if !GMX_THREAD_MPI + gmx_bool bParFn = (multidir == NULL); + init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn); +#else + gmx_fatal(FARGS, "mdrun -multi or -multidir are not supported with the thread-MPI library. " + "Please compile GROMACS with a proper external MPI library."); +#endif + } + + handleRestart(cr, bTryToAppendFiles, NFILE, fnm, + &bDoAppendFiles, &bStartFromCpt); + + Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0; + Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0); + Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0); + Flags = Flags | (bTunePME ? MD_TUNEPME : 0); + Flags = Flags | (bConfout ? MD_CONFOUT : 0); + Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0); + Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0); + Flags = Flags | (bDoAppendFiles ? MD_APPENDFILES : 0); + Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0); + Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); + Flags = Flags | (bStartFromCpt ? MD_STARTFROMCPT : 0); + Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0); + Flags = Flags | (opt2parg_bSet("-ntomp", asize(pa), pa) ? MD_NTOMPSET : 0); + Flags = Flags | (bIMDwait ? MD_IMDWAIT : 0); + Flags = Flags | (bIMDterm ? MD_IMDTERM : 0); + Flags = Flags | (bIMDpull ? MD_IMDPULL : 0); + + /* We postpone opening the log file if we are appending, so we can + first truncate the old log file and append to the correct position + there instead. */ + if (MASTER(cr) && !bDoAppendFiles) + { + gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, + Flags & MD_APPENDFILES, &fplog); + } + else + { + fplog = NULL; + } + + ddxyz[XX] = (int)(realddxyz[XX] + 0.5); + ddxyz[YY] = (int)(realddxyz[YY] + 0.5); + ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); + + /* PLUMED */ + plumedswitch=0; + if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1; + if(plumedswitch){ + plumedcmd=plumed_cmd; + int real_precision=sizeof(real); + real energyUnits=1.0; + real lengthUnits=1.0; + real timeUnits=1.0; + + if(!plumed_installed()){ + gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable."); + } + plumedmain=plumed_create(); + plumed_cmd(plumedmain,"setRealPrecision",&real_precision); + // this is not necessary for gromacs units: + plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits); + plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits); + plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits); + // + plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,NFILE,fnm)); + plumedswitch=1; + } + /* END PLUMED */ + + rc = gmx::mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, + nstglobalcomm, ddxyz, dd_rank_order, npme, rdd, rconstr, + dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, + nbpu_opt[0], nstlist, + nsteps, nstepout, resetstep, + nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, + pforce, cpt_period, max_hours, imdport, Flags); + + /* Log file has to be closed in mdrunner if we are appending to it + (fplog not set here) */ + if (MASTER(cr) && !bDoAppendFiles) + { + gmx_log_close(fplog); + } + + return rc; +} diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/mdrun.cpp.preplumed b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/mdrun.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..9458dfbf91803983e33b4ae66f8661057fb2d4db --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/mdrun.cpp.preplumed @@ -0,0 +1,544 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \defgroup module_mdrun Implementation of mdrun + * \ingroup group_mdrun + * + * \brief This module contains code that implements mdrun. + */ +/*! \internal \file + * + * \brief This file implements mdrun + * + * \author Berk Hess <hess@kth.se> + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Erik Lindahl <erik@kth.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "config.h" + +#include <stdio.h> +#include <string.h> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/commandline/pargs.h" +#include "gromacs/fileio/readinp.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/mdlib/main.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdrunutility/handlerestart.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/utility/arraysize.h" +#include "gromacs/utility/fatalerror.h" + +#include "mdrun_main.h" +#include "runner.h" + +/*! \brief Return whether either of the command-line parameters that + * will trigger a multi-simulation is set */ +static bool is_multisim_option_set(int argc, const char *const argv[]) +{ + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "-multi") == 0 || strcmp(argv[i], "-multidir") == 0) + { + return true; + } + } + return false; +} + +//! Implements C-style main function for mdrun +int gmx_mdrun(int argc, char *argv[]) +{ + const char *desc[] = { + "[THISMODULE] is the main computational chemistry engine", + "within GROMACS. Obviously, it performs Molecular Dynamics simulations,", + "but it can also perform Stochastic Dynamics, Energy Minimization,", + "test particle insertion or (re)calculation of energies.", + "Normal mode analysis is another option. In this case [TT]mdrun[tt]", + "builds a Hessian matrix from single conformation.", + "For usual Normal Modes-like calculations, make sure that", + "the structure provided is properly energy-minimized.", + "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]", + "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])", + "and distributes the topology over ranks if needed.", + "[TT]mdrun[tt] produces at least four output files.", + "A single log file ([TT]-g[tt]) is written.", + "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and", + "optionally forces.", + "The structure file ([TT]-c[tt]) contains the coordinates and", + "velocities of the last step.", + "The energy file ([TT]-e[tt]) contains energies, the temperature,", + "pressure, etc, a lot of these things are also printed in the log file.", + "Optionally coordinates can be written to a compressed trajectory file", + "([TT]-x[tt]).[PAR]", + "The option [TT]-dhdl[tt] is only used when free energy calculation is", + "turned on.[PAR]", + "Running mdrun efficiently in parallel is a complex topic topic,", + "many aspects of which are covered in the online User Guide. You", + "should look there for practical advice on using many of the options", + "available in mdrun.[PAR]", + "ED (essential dynamics) sampling and/or additional flooding potentials", + "are switched on by using the [TT]-ei[tt] flag followed by an [REF].edi[ref]", + "file. The [REF].edi[ref] file can be produced with the [TT]make_edi[tt] tool", + "or by using options in the essdyn menu of the WHAT IF program.", + "[TT]mdrun[tt] produces a [REF].xvg[ref] output file that", + "contains projections of positions, velocities and forces onto selected", + "eigenvectors.[PAR]", + "When user-defined potential functions have been selected in the", + "[REF].mdp[ref] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]", + "a formatted table with potential functions. The file is read from", + "either the current directory or from the [TT]GMXLIB[tt] directory.", + "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,", + "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with", + "normal Coulomb.", + "When pair interactions are present, a separate table for pair interaction", + "functions is read using the [TT]-tablep[tt] option.[PAR]", + "When tabulated bonded functions are present in the topology,", + "interaction functions are read using the [TT]-tableb[tt] option.", + "For each different tabulated interaction type the table file name is", + "modified in a different way: before the file extension an underscore is", + "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals", + "and finally the table number of the interaction type.[PAR]", + "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM", + "coordinates and forces when pulling is selected", + "in the [REF].mdp[ref] file.[PAR]", + "Finally some experimental algorithms can be tested when the", + "appropriate options have been given. Currently under", + "investigation are: polarizability.", + "[PAR]", + "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed", + "a protein into a membrane. This module requires a number of settings", + "that are provided in a data file that is the argument of this option.", + "For more details in membrane embedding, see the documentation in the", + "user guide. The options [TT]-mn[tt] and [TT]-mp[tt] are used to provide", + "the index and topology files used for the embedding.", + "[PAR]", + "The option [TT]-pforce[tt] is useful when you suspect a simulation", + "crashes due to too large forces. With this option coordinates and", + "forces of atoms with a force larger than a certain value will", + "be printed to stderr.", + "[PAR]", + "Checkpoints containing the complete state of the system are written", + "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],", + "unless option [TT]-cpt[tt] is set to -1.", + "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to", + "make sure that a recent state of the system is always available,", + "even when the simulation is terminated while writing a checkpoint.", + "With [TT]-cpnum[tt] all checkpoint files are kept and appended", + "with the step number.", + "A simulation can be continued by reading the full state from file", + "with option [TT]-cpi[tt]. This option is intelligent in the way that", + "if no checkpoint file is found, GROMACS just assumes a normal run and", + "starts from the first step of the [REF].tpr[ref] file. By default the output", + "will be appending to the existing output files. The checkpoint file", + "contains checksums of all output files, such that you will never", + "loose data when some output files are modified, corrupt or removed.", + "There are three scenarios with [TT]-cpi[tt]:[PAR]", + "[TT]*[tt] no files with matching names are present: new output files are written[PAR]", + "[TT]*[tt] all files are present with names and checksums matching those stored", + "in the checkpoint file: files are appended[PAR]", + "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]", + "With [TT]-noappend[tt] new output files are opened and the simulation", + "part number is added to all output file names.", + "Note that in all cases the checkpoint file itself is not renamed", + "and will be overwritten, unless its name does not match", + "the [TT]-cpo[tt] option.", + "[PAR]", + "With checkpointing the output is appended to previously written", + "output files, unless [TT]-noappend[tt] is used or none of the previous", + "output files are present (except for the checkpoint file).", + "The integrity of the files to be appended is verified using checksums", + "which are stored in the checkpoint file. This ensures that output can", + "not be mixed up or corrupted due to file appending. When only some", + "of the previous output files are present, a fatal error is generated", + "and no old output files are modified and no new output files are opened.", + "The result with appending will be the same as from a single run.", + "The contents will be binary identical, unless you use a different number", + "of ranks or dynamic load balancing or the FFT library uses optimizations", + "through timing.", + "[PAR]", + "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint", + "file is written at the first neighbor search step where the run time", + "exceeds [TT]-maxh[tt]\\*0.99 hours. This option is particularly useful in", + "combination with setting [TT]nsteps[tt] to -1 either in the mdp or using the", + "similarly named command line option. This results in an infinite run,", + "terminated only when the time limit set by [TT]-maxh[tt] is reached (if any)" + "or upon receiving a signal." + "[PAR]", + "When [TT]mdrun[tt] receives a TERM signal, it will stop as soon as", + "checkpoint file can be written, i.e. after the next global communication step.", + "When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is", + "pressed), it will stop at the next neighbor search step or at the", + "second global communication step, whichever happens later.", + "In both cases all the usual output will be written to file.", + "When running with MPI, a signal to one of the [TT]mdrun[tt] ranks", + "is sufficient, this signal should not be sent to mpirun or", + "the [TT]mdrun[tt] process that is the parent of the others.", + "[PAR]", + "Interactive molecular dynamics (IMD) can be activated by using at least one", + "of the three IMD switches: The [TT]-imdterm[tt] switch allows one to terminate", + "the simulation from the molecular viewer (e.g. VMD). With [TT]-imdwait[tt],", + "[TT]mdrun[tt] pauses whenever no IMD client is connected. Pulling from the", + "IMD remote can be turned on by [TT]-imdpull[tt].", + "The port [TT]mdrun[tt] listens to can be altered by [TT]-imdport[tt].The", + "file pointed to by [TT]-if[tt] contains atom indices and forces if IMD", + "pulling is used." + "[PAR]", + "When [TT]mdrun[tt] is started with MPI, it does not run niced by default." + }; + t_commrec *cr; + t_filenm fnm[] = { + { efTPR, NULL, NULL, ffREAD }, + { efTRN, "-o", NULL, ffWRITE }, + { efCOMPRESSED, "-x", NULL, ffOPTWR }, + { efCPT, "-cpi", NULL, ffOPTRD | ffALLOW_MISSING }, + { efCPT, "-cpo", NULL, ffOPTWR }, + { efSTO, "-c", "confout", ffWRITE }, + { efEDR, "-e", "ener", ffWRITE }, + { efLOG, "-g", "md", ffWRITE }, + { efXVG, "-dhdl", "dhdl", ffOPTWR }, + { efXVG, "-field", "field", ffOPTWR }, + { efXVG, "-table", "table", ffOPTRD }, + { efXVG, "-tablep", "tablep", ffOPTRD }, + { efXVG, "-tableb", "table", ffOPTRD }, + { efTRX, "-rerun", "rerun", ffOPTRD }, + { efXVG, "-tpi", "tpi", ffOPTWR }, + { efXVG, "-tpid", "tpidist", ffOPTWR }, + { efEDI, "-ei", "sam", ffOPTRD }, + { efXVG, "-eo", "edsam", ffOPTWR }, + { efXVG, "-devout", "deviatie", ffOPTWR }, + { efXVG, "-runav", "runaver", ffOPTWR }, + { efXVG, "-px", "pullx", ffOPTWR }, + { efXVG, "-pf", "pullf", ffOPTWR }, + { efXVG, "-ro", "rotation", ffOPTWR }, + { efLOG, "-ra", "rotangles", ffOPTWR }, + { efLOG, "-rs", "rotslabs", ffOPTWR }, + { efLOG, "-rt", "rottorque", ffOPTWR }, + { efMTX, "-mtx", "nm", ffOPTWR }, + { efRND, "-multidir", NULL, ffOPTRDMULT}, + { efDAT, "-membed", "membed", ffOPTRD }, + { efTOP, "-mp", "membed", ffOPTRD }, + { efNDX, "-mn", "membed", ffOPTRD }, + { efXVG, "-if", "imdforces", ffOPTWR }, + { efXVG, "-swap", "swapions", ffOPTWR } + }; + const int NFILE = asize(fnm); + + /* Command line options ! */ + gmx_bool bDDBondCheck = TRUE; + gmx_bool bDDBondComm = TRUE; + gmx_bool bTunePME = TRUE; + gmx_bool bVerbose = FALSE; + gmx_bool bRerunVSite = FALSE; + gmx_bool bConfout = TRUE; + gmx_bool bReproducible = FALSE; + gmx_bool bIMDwait = FALSE; + gmx_bool bIMDterm = FALSE; + gmx_bool bIMDpull = FALSE; + + int npme = -1; + int nstlist = 0; + int nmultisim = 0; + int nstglobalcomm = -1; + int repl_ex_nst = 0; + int repl_ex_seed = -1; + int repl_ex_nex = 0; + int nstepout = 100; + int resetstep = -1; + gmx_int64_t nsteps = -2; /* the value -2 means that the mdp option will be used */ + int imdport = 8888; /* can be almost anything, 8888 is easy to remember */ + + rvec realddxyz = {0, 0, 0}; + const char *ddrank_opt[ddrankorderNR+1] = + { NULL, "interleave", "pp_pme", "cartesian", NULL }; + const char *dddlb_opt[] = + { NULL, "auto", "no", "yes", NULL }; + const char *thread_aff_opt[threadaffNR+1] = + { NULL, "auto", "on", "off", NULL }; + const char *nbpu_opt[] = + { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL }; + real rdd = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1; + char *ddcsx = NULL, *ddcsy = NULL, *ddcsz = NULL; + real cpt_period = 15.0, max_hours = -1; + gmx_bool bTryToAppendFiles = TRUE; + gmx_bool bKeepAndNumCPT = FALSE; + gmx_bool bResetCountersHalfWay = FALSE; + gmx_output_env_t *oenv = NULL; + + /* Non transparent initialization of a complex gmx_hw_opt_t struct. + * But unfortunately we are not allowed to call a function here, + * since declarations follow below. + */ + gmx_hw_opt_t hw_opt = { + 0, 0, 0, 0, threadaffSEL, 0, 0, + { NULL, FALSE, 0, NULL } + }; + + t_pargs pa[] = { + + { "-dd", FALSE, etRVEC, {&realddxyz}, + "Domain decomposition grid, 0 is optimize" }, + { "-ddorder", FALSE, etENUM, {ddrank_opt}, + "DD rank order" }, + { "-npme", FALSE, etINT, {&npme}, + "Number of separate ranks to be used for PME, -1 is guess" }, + { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, + "Total number of threads to start (0 is guess)" }, + { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, + "Number of thread-MPI threads to start (0 is guess)" }, + { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, + "Number of OpenMP threads per MPI rank to start (0 is guess)" }, + { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, + "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, + { "-pin", FALSE, etENUM, {thread_aff_opt}, + "Whether mdrun should try to set thread affinities" }, + { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, + "The lowest logical core number to which mdrun should pin the first thread" }, + { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, + "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, + { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id}, + "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" }, + { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck}, + "Check for all bonded interactions with DD" }, + { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm}, + "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, + { "-rdd", FALSE, etREAL, {&rdd}, + "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, + { "-rcon", FALSE, etREAL, {&rconstr}, + "Maximum distance for P-LINCS (nm), 0 is estimate" }, + { "-dlb", FALSE, etENUM, {dddlb_opt}, + "Dynamic load balancing (with DD)" }, + { "-dds", FALSE, etREAL, {&dlb_scale}, + "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to " + "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." }, + { "-ddcsx", FALSE, etSTR, {&ddcsx}, + "HIDDENA string containing a vector of the relative sizes in the x " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsy", FALSE, etSTR, {&ddcsy}, + "HIDDENA string containing a vector of the relative sizes in the y " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsz", FALSE, etSTR, {&ddcsz}, + "HIDDENA string containing a vector of the relative sizes in the z " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-gcom", FALSE, etINT, {&nstglobalcomm}, + "Global communication frequency" }, + { "-nb", FALSE, etENUM, {&nbpu_opt}, + "Calculate non-bonded interactions on" }, + { "-nstlist", FALSE, etINT, {&nstlist}, + "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, + { "-tunepme", FALSE, etBOOL, {&bTunePME}, + "Optimize PME load between PP/PME ranks or GPU/CPU" }, + { "-v", FALSE, etBOOL, {&bVerbose}, + "Be loud and noisy" }, + { "-pforce", FALSE, etREAL, {&pforce}, + "Print all forces larger than this (kJ/mol nm)" }, + { "-reprod", FALSE, etBOOL, {&bReproducible}, + "Try to avoid optimizations that affect binary reproducibility" }, + { "-cpt", FALSE, etREAL, {&cpt_period}, + "Checkpoint interval (minutes)" }, + { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT}, + "Keep and number checkpoint files" }, + { "-append", FALSE, etBOOL, {&bTryToAppendFiles}, + "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, + { "-nsteps", FALSE, etINT64, {&nsteps}, + "Run this number of steps, overrides .mdp file option (-1 means infinite, -2 means use mdp option, smaller is invalid)" }, + { "-maxh", FALSE, etREAL, {&max_hours}, + "Terminate after 0.99 times this time (hours)" }, + { "-multi", FALSE, etINT, {&nmultisim}, + "Do multiple simulations in parallel" }, + { "-replex", FALSE, etINT, {&repl_ex_nst}, + "Attempt replica exchange periodically with this period (steps)" }, + { "-nex", FALSE, etINT, {&repl_ex_nex}, + "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, + { "-reseed", FALSE, etINT, {&repl_ex_seed}, + "Seed for replica exchange, -1 is generate a seed" }, + { "-imdport", FALSE, etINT, {&imdport}, + "HIDDENIMD listening port" }, + { "-imdwait", FALSE, etBOOL, {&bIMDwait}, + "HIDDENPause the simulation while no IMD client is connected" }, + { "-imdterm", FALSE, etBOOL, {&bIMDterm}, + "HIDDENAllow termination of the simulation from IMD client" }, + { "-imdpull", FALSE, etBOOL, {&bIMDpull}, + "HIDDENAllow pulling in the simulation from IMD client" }, + { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite}, + "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, + { "-confout", FALSE, etBOOL, {&bConfout}, + "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, + { "-stepout", FALSE, etINT, {&nstepout}, + "HIDDENFrequency of writing the remaining wall clock time for the run" }, + { "-resetstep", FALSE, etINT, {&resetstep}, + "HIDDENReset cycle counters after these many time steps" }, + { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay}, + "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } + }; + unsigned long Flags; + ivec ddxyz; + int dd_rank_order; + gmx_bool bDoAppendFiles, bStartFromCpt; + FILE *fplog; + int rc; + char **multidir = NULL; + + cr = init_commrec(); + + unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM; + // With -multi or -multidir, the file names are going to get processed + // further (or the working directory changed), so we can't check for their + // existence during parsing. It isn't useful to do any completion based on + // file system contents, either. + if (is_multisim_option_set(argc, argv)) + { + PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING; + } + + /* Comment this in to do fexist calls only on master + * works not with rerun or tables at the moment + * also comment out the version of init_forcerec in md.c + * with NULL instead of opt2fn + */ + /* + if (!MASTER(cr)) + { + PCA_Flags |= PCA_NOT_READ_NODE; + } + */ + + if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa, + asize(desc), desc, 0, NULL, &oenv)) + { + return 0; + } + + + dd_rank_order = nenum(ddrank_opt); + + hw_opt.thread_affinity = nenum(thread_aff_opt); + + /* now check the -multi and -multidir option */ + if (opt2bSet("-multidir", NFILE, fnm)) + { + if (nmultisim > 0) + { + gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive."); + } + nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm); + } + + + if (repl_ex_nst != 0 && nmultisim < 2) + { + gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)"); + } + + if (repl_ex_nex < 0) + { + gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); + } + + if (nmultisim >= 1) + { +#if !GMX_THREAD_MPI + gmx_bool bParFn = (multidir == NULL); + init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn); +#else + gmx_fatal(FARGS, "mdrun -multi or -multidir are not supported with the thread-MPI library. " + "Please compile GROMACS with a proper external MPI library."); +#endif + } + + handleRestart(cr, bTryToAppendFiles, NFILE, fnm, + &bDoAppendFiles, &bStartFromCpt); + + Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0; + Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0); + Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0); + Flags = Flags | (bTunePME ? MD_TUNEPME : 0); + Flags = Flags | (bConfout ? MD_CONFOUT : 0); + Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0); + Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0); + Flags = Flags | (bDoAppendFiles ? MD_APPENDFILES : 0); + Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0); + Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); + Flags = Flags | (bStartFromCpt ? MD_STARTFROMCPT : 0); + Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0); + Flags = Flags | (opt2parg_bSet("-ntomp", asize(pa), pa) ? MD_NTOMPSET : 0); + Flags = Flags | (bIMDwait ? MD_IMDWAIT : 0); + Flags = Flags | (bIMDterm ? MD_IMDTERM : 0); + Flags = Flags | (bIMDpull ? MD_IMDPULL : 0); + + /* We postpone opening the log file if we are appending, so we can + first truncate the old log file and append to the correct position + there instead. */ + if (MASTER(cr) && !bDoAppendFiles) + { + gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr, + Flags & MD_APPENDFILES, &fplog); + } + else + { + fplog = NULL; + } + + ddxyz[XX] = (int)(realddxyz[XX] + 0.5); + ddxyz[YY] = (int)(realddxyz[YY] + 0.5); + ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5); + + rc = gmx::mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, + nstglobalcomm, ddxyz, dd_rank_order, npme, rdd, rconstr, + dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz, + nbpu_opt[0], nstlist, + nsteps, nstepout, resetstep, + nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed, + pforce, cpt_period, max_hours, imdport, Flags); + + /* Log file has to be closed in mdrunner if we are appending to it + (fplog not set here) */ + if (MASTER(cr) && !bDoAppendFiles) + { + gmx_log_close(fplog); + } + + return rc; +} diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/repl_ex.cpp b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/repl_ex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b870b96d1ec09aa25e998809568da441d706738 --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/repl_ex.cpp @@ -0,0 +1,1479 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#include "gmxpre.h" + +#include "repl_ex.h" + +#include "config.h" + +#include <math.h> + +#include <random> + +#include "gromacs/domdec/domdec.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/math/units.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/main.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/random/threefry.h" +#include "gromacs/random/uniformintdistribution.h" +#include "gromacs/random/uniformrealdistribution.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/smalloc.h" + + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +/* END PLUMED */ + +#define PROBABILITYCUTOFF 100 +/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ + +//! Rank in the multisimulaiton +#define MSRANK(ms, nodeid) (nodeid) + +enum { + ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR +}; +const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; +/* end_single_marker merely notes the end of single variable replica exchange. All types higher than + it are multiple replica exchange methods */ +/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?; + Let's wait until we feel better about the pressure control methods giving exact ensembles. Right now, we assume constant pressure */ + +typedef struct gmx_repl_ex +{ + int repl; /* replica ID */ + int nrepl; /* total number of replica */ + real temp; /* temperature */ + int type; /* replica exchange type from ere enum */ + real **q; /* quantity, e.g. temperature or lambda; first index is ere, second index is replica ID */ + gmx_bool bNPT; /* use constant pressure and temperature */ + real *pres; /* replica pressures */ + int *ind; /* replica indices */ + int *allswaps; /* used for keeping track of all the replica swaps */ + int nst; /* replica exchange interval (number of steps) */ + int nex; /* number of exchanges per interval */ + int seed; /* random seed */ + int nattempt[2]; /* number of even and odd replica change attempts */ + real *prob_sum; /* sum of probabilities */ + int **nmoves; /* number of moves between replicas i and j */ + int *nexchange; /* i-th element of the array is the number of exchanges between replica i-1 and i */ + + /* these are helper arrays for replica exchange; allocated here so they + don't have to be allocated each time */ + int *destinations; + int **cyclic; + int **order; + int *tmpswap; + gmx_bool *incycle; + gmx_bool *bEx; + + /* helper arrays to hold the quantities that are exchanged */ + real *prob; + real *Epot; + real *beta; + real *Vol; + real **de; + +} t_gmx_repl_ex; + +static gmx_bool repl_quantity(const gmx_multisim_t *ms, + struct gmx_repl_ex *re, int ere, real q) +{ + real *qall; + gmx_bool bDiff; + int s; + + snew(qall, ms->nsim); + qall[re->repl] = q; + gmx_sum_sim(ms->nsim, qall, ms); + + /* PLUMED */ + //bDiff = FALSE; + //for (s = 1; s < ms->nsim; s++) + //{ + // if (qall[s] != qall[0]) + // { + bDiff = TRUE; + // } + //} + /* END PLUMED */ + + if (bDiff) + { + /* Set the replica exchange type and quantities */ + re->type = ere; + + snew(re->q[ere], re->nrepl); + for (s = 0; s < ms->nsim; s++) + { + re->q[ere][s] = qall[s]; + } + } + sfree(qall); + return bDiff; +} + +gmx_repl_ex_t init_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + const t_state *state, + const t_inputrec *ir, + int nst, int nex, int init_seed) +{ + real pres; + int i, j, k; + struct gmx_repl_ex *re; + gmx_bool bTemp; + gmx_bool bLambda = FALSE; + + fprintf(fplog, "\nInitializing Replica Exchange\n"); + + if (ms == NULL || ms->nsim == 1) + { + gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?"); + } + if (!EI_DYNAMICS(ir->eI)) + { + gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); + /* Note that PAR(cr) is defined by cr->nnodes > 1, which is + * distinct from MULTISIM(cr). A multi-simulation only runs + * with real MPI parallelism, but this does not imply PAR(cr) + * is true! + * + * Since we are using a dynamical integrator, the only + * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are + * synonymous. The only way for cr->nnodes > 1 to be true is + * if we are using DD. */ + } + + snew(re, 1); + + re->repl = ms->sim; + re->nrepl = ms->nsim; + snew(re->q, ereENDSINGLE); + + fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); + + check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE); + check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); + check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); + check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst, + "first exchange step: init_step/-replex", FALSE); + check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); + check_multi_int(fplog, ms, ir->opts.ngtc, + "the number of temperature coupling groups", FALSE); + check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); + check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); + check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); + + re->temp = ir->opts.ref_t[0]; + for (i = 1; (i < ir->opts.ngtc); i++) + { + if (ir->opts.ref_t[i] != re->temp) + { + fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + } + } + + re->type = -1; + bTemp = repl_quantity(ms, re, ereTEMP, re->temp); + if (ir->efep != efepNO) + { + bLambda = repl_quantity(ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state); + } + if (re->type == -1) /* nothing was assigned */ + { + gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); + } + if (bLambda && bTemp) + { + re->type = ereTL; + } + + if (bTemp) + { + please_cite(fplog, "Sugita1999a"); + if (ir->epc != epcNO) + { + re->bNPT = TRUE; + fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); + please_cite(fplog, "Okabe2001a"); + } + if (ir->etc == etcBERENDSEN) + { + gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", + ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); + } + } + if (bLambda) + { + if (ir->fepvals->delta_lambda != 0) /* check this? */ + { + gmx_fatal(FARGS, "delta_lambda is not zero"); + } + } + if (re->bNPT) + { + snew(re->pres, re->nrepl); + if (ir->epct == epctSURFACETENSION) + { + pres = ir->ref_p[ZZ][ZZ]; + } + else + { + pres = 0; + j = 0; + for (i = 0; i < DIM; i++) + { + if (ir->compress[i][i] != 0) + { + pres += ir->ref_p[i][i]; + j++; + } + } + pres /= j; + } + re->pres[re->repl] = pres; + gmx_sum_sim(re->nrepl, re->pres, ms); + } + + /* Make an index for increasing replica order */ + /* only makes sense if one or the other is varying, not both! + if both are varying, we trust the order the person gave. */ + snew(re->ind, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->ind[i] = i; + } + + /* PLUMED */ + // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD) + // in those cases replicas can share the same temperature. + /* + if (re->type < ereENDSINGLE) + { + + for (i = 0; i < re->nrepl; i++) + { + for (j = i+1; j < re->nrepl; j++) + { + if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) + {*/ + /* Unordered replicas are supposed to work, but there + * is still an issues somewhere. + * Note that at this point still re->ind[i]=i. + */ + /* + gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", + i, j, + erename[re->type], + re->q[re->type][i], re->q[re->type][j], + erename[re->type]); + + k = re->ind[i]; + re->ind[i] = re->ind[j]; + re->ind[j] = k; + } + else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) + { + gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); + } + } + } + } + */ + /* END PLUMED */ + + /* keep track of all the swaps, starting with the initial placement. */ + snew(re->allswaps, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->allswaps[i] = re->ind[i]; + } + + switch (re->type) + { + case ereTEMP: + fprintf(fplog, "\nReplica exchange in temperature\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + case ereLAMBDA: + fprintf(fplog, "\nReplica exchange in lambda\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + case ereTL: + fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); + } + fprintf(fplog, "\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (re->bNPT) + { + fprintf(fplog, "\nRepl p"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); + } + + for (i = 0; i < re->nrepl; i++) + { + if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) + { + fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + } + } + } + re->nst = nst; + if (init_seed == -1) + { + if (MASTERSIM(ms)) + { + re->seed = static_cast<int>(gmx::makeRandomSeed()); + } + else + { + re->seed = 0; + } + gmx_sumi_sim(1, &(re->seed), ms); + } + else + { + re->seed = init_seed; + } + fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); + fprintf(fplog, "\nReplica random seed: %d\n", re->seed); + + re->nattempt[0] = 0; + re->nattempt[1] = 0; + + snew(re->prob_sum, re->nrepl); + snew(re->nexchange, re->nrepl); + snew(re->nmoves, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->nmoves[i], re->nrepl); + } + fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n"); + + /* generate space for the helper functions so we don't have to snew each time */ + + snew(re->destinations, re->nrepl); + snew(re->incycle, re->nrepl); + snew(re->tmpswap, re->nrepl); + snew(re->cyclic, re->nrepl); + snew(re->order, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->cyclic[i], re->nrepl+1); + snew(re->order[i], re->nrepl); + } + /* allocate space for the functions storing the data for the replicas */ + /* not all of these arrays needed in all cases, but they don't take + up much space, since the max size is nrepl**2 */ + snew(re->prob, re->nrepl); + snew(re->bEx, re->nrepl); + snew(re->beta, re->nrepl); + snew(re->Vol, re->nrepl); + snew(re->Epot, re->nrepl); + snew(re->de, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->de[i], re->nrepl); + } + re->nex = nex; + return re; +} + +static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n) +{ + real *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + + +static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n) +{ + double *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + +static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n) +{ + rvec *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + copy_rvec(buf[i], v[i]); + } + sfree(buf); + } +} + +static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) +{ + /* When t_state changes, this code should be updated. */ + int ngtc, nnhpres; + ngtc = state->ngtc * state->nhchainlength; + nnhpres = state->nnhpres* state->nhchainlength; + exchange_rvecs(ms, b, state->box, DIM); + exchange_rvecs(ms, b, state->box_rel, DIM); + exchange_rvecs(ms, b, state->boxv, DIM); + exchange_reals(ms, b, &(state->veta), 1); + exchange_reals(ms, b, &(state->vol0), 1); + exchange_rvecs(ms, b, state->svir_prev, DIM); + exchange_rvecs(ms, b, state->fvir_prev, DIM); + exchange_rvecs(ms, b, state->pres_prev, DIM); + exchange_doubles(ms, b, state->nosehoover_xi, ngtc); + exchange_doubles(ms, b, state->nosehoover_vxi, ngtc); + exchange_doubles(ms, b, state->nhpres_xi, nnhpres); + exchange_doubles(ms, b, state->nhpres_vxi, nnhpres); + exchange_doubles(ms, b, state->therm_integral, state->ngtc); + exchange_rvecs(ms, b, state->x, state->natoms); + exchange_rvecs(ms, b, state->v, state->natoms); +} + +static void copy_rvecs(rvec *s, rvec *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + copy_rvec(s[i], d[i]); + } + } +} + +static void copy_doubles(const double *s, double *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + d[i] = s[i]; + } + } +} + +static void copy_reals(const real *s, real *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + d[i] = s[i]; + } + } +} + +static void copy_ints(const int *s, int *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + d[i] = s[i]; + } + } +} + +#define scopy_rvecs(v, n) copy_rvecs(state->v, state_local->v, n); +#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n); +#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n); +#define scopy_ints(v, n) copy_ints(state->v, state_local->v, n); + +static void copy_state_nonatomdata(t_state *state, t_state *state_local) +{ + /* When t_state changes, this code should be updated. */ + int ngtc, nnhpres; + ngtc = state->ngtc * state->nhchainlength; + nnhpres = state->nnhpres* state->nhchainlength; + scopy_rvecs(box, DIM); + scopy_rvecs(box_rel, DIM); + scopy_rvecs(boxv, DIM); + state_local->veta = state->veta; + state_local->vol0 = state->vol0; + scopy_rvecs(svir_prev, DIM); + scopy_rvecs(fvir_prev, DIM); + scopy_rvecs(pres_prev, DIM); + scopy_doubles(nosehoover_xi, ngtc); + scopy_doubles(nosehoover_vxi, ngtc); + scopy_doubles(nhpres_xi, nnhpres); + scopy_doubles(nhpres_vxi, nnhpres); + scopy_doubles(therm_integral, state->ngtc); + scopy_rvecs(x, state->natoms); + scopy_rvecs(v, state->natoms); + copy_ints(&(state->fep_state), &(state_local->fep_state), 1); + scopy_reals(lambda, efptNR); +} + +static void scale_velocities(t_state *state, real fac) +{ + int i; + + if (state->v) + { + for (i = 0; i < state->natoms; i++) + { + svmul(fac, state->v[i], state->v[i]); + } + } +} + +static void print_transition_matrix(FILE *fplog, int n, int **nmoves, int *nattempt) +{ + int i, j, ntot; + float Tprint; + + ntot = nattempt[0] + nattempt[1]; + fprintf(fplog, "\n"); + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, " "); /* put the title closer to the center */ + } + fprintf(fplog, "Empirical Transition Matrix\n"); + + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%8d", (i+1)); + } + fprintf(fplog, "\n"); + + for (i = 0; i < n; i++) + { + fprintf(fplog, "Repl"); + for (j = 0; j < n; j++) + { + Tprint = 0.0; + if (nmoves[i][j] > 0) + { + Tprint = nmoves[i][j]/(2.0*ntot); + } + fprintf(fplog, "%8.4f", Tprint); + } + fprintf(fplog, "%3d\n", i); + } +} + +static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx) +{ + int i; + + fprintf(fplog, "Repl %2s %2d", leg, ind[0]); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]); + } + fprintf(fplog, "\n"); +} + +static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap) +{ + int i; + + for (i = 0; i < n; i++) + { + tmpswap[i] = allswaps[i]; + } + for (i = 0; i < n; i++) + { + allswaps[i] = tmpswap[pind[i]]; + } + + fprintf(fplog, "\nAccepted Exchanges: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", pind[i]); + } + fprintf(fplog, "\n"); + + /* the "Order After Exchange" is the state label corresponding to the configuration that + started in state listed in order, i.e. + + 3 0 1 2 + + means that the: + configuration starting in simulation 3 is now in simulation 0, + configuration starting in simulation 0 is now in simulation 1, + configuration starting in simulation 1 is now in simulation 2, + configuration starting in simulation 2 is now in simulation 3 + */ + fprintf(fplog, "Order After Exchange: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", allswaps[i]); + } + fprintf(fplog, "\n\n"); +} + +static void print_prob(FILE *fplog, const char *leg, int n, real *prob) +{ + int i; + char buf[8]; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + if (prob[i] >= 0) + { + sprintf(buf, "%4.2f", prob[i]); + fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); + } + else + { + fprintf(fplog, " "); + } + } + fprintf(fplog, "\n"); +} + +static void print_count(FILE *fplog, const char *leg, int n, int *count) +{ + int i; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %4d", count[i]); + } + fprintf(fplog, "\n"); +} + +static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) +{ + + real ediff, dpV, delta = 0; + real *Epot = re->Epot; + real *Vol = re->Vol; + real **de = re->de; + real *beta = re->beta; + + /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce + to the non permuted case */ + + switch (re->type) + { + case ereTEMP: + /* + * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 + */ + ediff = Epot[b] - Epot[a]; + delta = -(beta[bp] - beta[ap])*ediff; + break; + case ereLAMBDA: + /* two cases: when we are permuted, and not. */ + /* non-permuted: + ediff = E_new - E_old + = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] + = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] + = de[b][a] + de[a][b] */ + + /* permuted: + ediff = E_new - E_old + = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] + = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] + = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ + /* but, in the current code implementation, we flip configurations, not indices . . . + So let's examine that. + = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] + = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] + = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] + So, if we exchange b<=> bp and a<=> ap, we return to the same result. + So the simple solution is to flip the + position of perturbed and original indices in the tests. + */ + + ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); + delta = ediff*beta[a]; /* assume all same temperature in this case */ + break; + case ereTL: + /* not permuted: */ + /* delta = reduced E_new - reduced E_old + = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] + = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + + [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + + beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) + = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ + /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ + /* permuted (big breath!) */ + /* delta = reduced E_new - reduced E_old + = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) + - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + + [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] + + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + + [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] + + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) + = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) + + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ + delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (bPrint) + { + fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); + } + if (re->bNPT) + { + /* revist the calculation for 5.0. Might be some improvements. */ + dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; + if (bPrint) + { + fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); + } + delta += dpV; + } + return delta; +} + +static void +test_for_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + struct gmx_repl_ex *re, + gmx_enerdata_t *enerd, + real vol, + gmx_int64_t step, + real time) +{ + int m, i, j, a, b, ap, bp, i0, i1, tmp; + real delta = 0; + gmx_bool bPrint, bMultiEx; + gmx_bool *bEx = re->bEx; + real *prob = re->prob; + int *pind = re->destinations; /* permuted index */ + gmx_bool bEpot = FALSE; + gmx_bool bDLambda = FALSE; + gmx_bool bVol = FALSE; + gmx::ThreeFry2x64<64> rng(re->seed, gmx::RandomDomain::ReplicaExchange); + gmx::UniformRealDistribution<real> uniformRealDist; + gmx::UniformIntDistribution<int> uniformNreplDist(0, re->nrepl-1); + + bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ + fprintf(fplog, "Replica exchange at step %" GMX_PRId64 " time %.5f\n", step, time); + + if (re->bNPT) + { + for (i = 0; i < re->nrepl; i++) + { + re->Vol[i] = 0; + } + bVol = TRUE; + re->Vol[re->repl] = vol; + } + if ((re->type == ereTEMP || re->type == ereTL)) + { + for (i = 0; i < re->nrepl; i++) + { + re->Epot[i] = 0; + } + bEpot = TRUE; + re->Epot[re->repl] = enerd->term[F_EPOT]; + /* temperatures of different states*/ + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); + } + } + else + { + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ + } + } + if (re->type == ereLAMBDA || re->type == ereTL) + { + bDLambda = TRUE; + /* lambda differences. */ + /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian + minus the energy of the jth simulation in the jth Hamiltonian */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->de[i][j] = 0; + } + } + for (i = 0; i < re->nrepl; i++) + { + re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]); + } + } + + /* now actually do the communication */ + if (bVol) + { + gmx_sum_sim(re->nrepl, re->Vol, ms); + } + if (bEpot) + { + gmx_sum_sim(re->nrepl, re->Epot, ms); + } + if (bDLambda) + { + for (i = 0; i < re->nrepl; i++) + { + gmx_sum_sim(re->nrepl, re->de[i], ms); + } + } + + /* make a duplicate set of indices for shuffling */ + for (i = 0; i < re->nrepl; i++) + { + pind[i] = re->ind[i]; + } + + /* PLUMED */ + int plumed_test_exchange_pattern=0; + /* END PLUMED */ + + if (bMultiEx) + { + /* multiple random switch exchange */ + int nself = 0; + + rng.restart( step, 0 ); + + for (i = 0; i < re->nex + nself; i++) + { + /* randomly select a pair */ + /* in theory, could reduce this by identifying only which switches had a nonneglibible + probability of occurring (log p > -100) and only operate on those switches */ + /* find out which state it is from, and what label that state currently has. Likely + more work that useful. */ + i0 = uniformNreplDist(rng); + i1 = uniformNreplDist(rng); + if (i0 == i1) + { + nself++; + continue; /* self-exchange, back up and do it again */ + } + + a = re->ind[i0]; /* what are the indices of these states? */ + b = re->ind[i1]; + ap = pind[i0]; + bp = pind[i1]; + + bPrint = FALSE; /* too noisy */ + /* calculate the energy difference */ + /* if the code changes to flip the STATES, rather than the configurations, + use the commented version of the code */ + /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ + delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); + + /* we actually only use the first space in the prob and bEx array, + since there are actually many switches between pairs. */ + + if (delta <= 0) + { + /* accepted */ + prob[0] = 1; + bEx[0] = TRUE; + } + else + { + if (delta > PROBABILITYCUTOFF) + { + prob[0] = 0; + } + else + { + prob[0] = exp(-delta); + } + /* roll a number to determine if accepted */ + bEx[0] = uniformRealDist(rng) < prob[0]; + } + re->prob_sum[0] += prob[0]; + + if (bEx[0]) + { + /* swap the states */ + tmp = pind[i0]; + pind[i0] = pind[i1]; + pind[i1] = tmp; + } + } + re->nattempt[0]++; /* keep track of total permutation trials here */ + print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); + } + else + { + /* standard nearest neighbor replica exchange */ + + m = (step / re->nst) % 2; + /* PLUMED */ + if(plumedswitch){ + int partner=re->repl; + plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern); + if(plumed_test_exchange_pattern>0){ + int *list; + snew(list,re->nrepl); + plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl)); + plumed_cmd(plumedmain,"getExchangesList",list); + for(i=0; i<re->nrepl; i++) re->ind[i]=list[i]; + sfree(list); + } + + for(i=1; i<re->nrepl; i++) { + if (i % 2 != m) continue; + a = re->ind[i-1]; + b = re->ind[i]; + if(re->repl==a) partner=b; + if(re->repl==b) partner=a; + } + plumed_cmd(plumedmain,"GREX setPartner",&partner); + plumed_cmd(plumedmain,"GREX calculate",NULL); + plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL); + } + /* END PLUMED */ + for (i = 1; i < re->nrepl; i++) + { + a = re->ind[i-1]; + b = re->ind[i]; + + bPrint = (re->repl == a || re->repl == b); + if (i % 2 == m) + { + delta = calc_delta(fplog, bPrint, re, a, b, a, b); + /* PLUMED */ + if(plumedswitch){ + real adb,bdb,dplumed; + char buf[300]; + sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb); + sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb); + dplumed=adb*re->beta[a]+bdb*re->beta[b]; + delta+=dplumed; + if (bPrint) + fprintf(fplog,"dplumed = %10.3e dE_Term = %10.3e (kT)\n",dplumed,delta); + } + /* END PLUMED */ + if (delta <= 0) + { + /* accepted */ + prob[i] = 1; + bEx[i] = TRUE; + } + else + { + if (delta > PROBABILITYCUTOFF) + { + prob[i] = 0; + } + else + { + prob[i] = exp(-delta); + } + /* roll a number to determine if accepted */ + bEx[i] = uniformRealDist(rng) < prob[i]; + } + re->prob_sum[i] += prob[i]; + + if (bEx[i]) + { + /* PLUMED */ + if(!plumed_test_exchange_pattern) { + /* standard neighbour swapping */ + /* swap these two */ + tmp = pind[i-1]; + pind[i-1] = pind[i]; + pind[i] = tmp; + re->nexchange[i]++; /* statistics for back compatibility */ + } else { + /* alternative swapping patterns */ + tmp = pind[a]; + pind[a] = pind[b]; + pind[b] = tmp; + re->nexchange[i]++; /* statistics for back compatibility */ + } + /* END PLUMED */ + } + } + else + { + prob[i] = -1; + bEx[i] = FALSE; + } + } + /* print some statistics */ + print_ind(fplog, "ex", re->nrepl, re->ind, bEx); + print_prob(fplog, "pr", re->nrepl, prob); + fprintf(fplog, "\n"); + re->nattempt[m]++; + } + + /* PLUMED */ + if(plumed_test_exchange_pattern>0) { + for (i = 0; i < re->nrepl; i++) + { + re->ind[i] = i; + } + } + /* END PLUMED */ + + /* record which moves were made and accepted */ + for (i = 0; i < re->nrepl; i++) + { + re->nmoves[re->ind[i]][pind[i]] += 1; + re->nmoves[pind[i]][re->ind[i]] += 1; + } + fflush(fplog); /* make sure we can see what the last exchange was */ +} + +static void +cyclic_decomposition(const int *destinations, + int **cyclic, + gmx_bool *incycle, + const int nrepl, + int *nswap) +{ + + int i, j, c, p; + int maxlen = 1; + for (i = 0; i < nrepl; i++) + { + incycle[i] = FALSE; + } + for (i = 0; i < nrepl; i++) /* one cycle for each replica */ + { + if (incycle[i]) + { + cyclic[i][0] = -1; + continue; + } + cyclic[i][0] = i; + incycle[i] = TRUE; + c = 1; + p = i; + for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ + { + p = destinations[p]; /* start permuting */ + if (p == i) + { + cyclic[i][c] = -1; + if (c > maxlen) + { + maxlen = c; + } + break; /* we've reached the original element, the cycle is complete, and we marked the end. */ + } + else + { + cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ + incycle[p] = TRUE; + c++; + } + } + } + *nswap = maxlen - 1; + + if (debug) + { + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Cycle %d:", i); + for (j = 0; j < nrepl; j++) + { + if (cyclic[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", cyclic[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +compute_exchange_order(int **cyclic, + int **order, + const int nrepl, + const int maxswap) +{ + int i, j; + + for (j = 0; j < maxswap; j++) + { + for (i = 0; i < nrepl; i++) + { + if (cyclic[i][j+1] >= 0) + { + order[cyclic[i][j+1]][j] = cyclic[i][j]; + order[cyclic[i][j]][j] = cyclic[i][j+1]; + } + } + for (i = 0; i < nrepl; i++) + { + if (order[i][j] < 0) + { + order[i][j] = i; /* if it's not exchanging, it should stay this round*/ + } + } + } + + if (debug) + { + fprintf(debug, "Replica Exchange Order\n"); + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Replica %d:", i); + for (j = 0; j < maxswap; j++) + { + if (order[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", order[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +prepare_to_do_exchange(struct gmx_repl_ex *re, + const int replica_id, + int *maxswap, + gmx_bool *bThisReplicaExchanged) +{ + int i, j; + /* Hold the cyclic decomposition of the (multiple) replica + * exchange. */ + gmx_bool bAnyReplicaExchanged = FALSE; + *bThisReplicaExchanged = FALSE; + + for (i = 0; i < re->nrepl; i++) + { + if (re->destinations[i] != re->ind[i]) + { + /* only mark as exchanged if the index has been shuffled */ + bAnyReplicaExchanged = TRUE; + break; + } + } + if (bAnyReplicaExchanged) + { + /* reinitialize the placeholder arrays */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->cyclic[i][j] = -1; + re->order[i][j] = -1; + } + } + + /* Identify the cyclic decomposition of the permutation (very + * fast if neighbor replica exchange). */ + cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); + + /* Now translate the decomposition into a replica exchange + * order at each step. */ + compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap); + + /* Did this replica do any exchange at any point? */ + for (j = 0; j < *maxswap; j++) + { + if (replica_id != re->order[replica_id][j]) + { + *bThisReplicaExchanged = TRUE; + break; + } + } + } +} + +gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re, + t_state *state, gmx_enerdata_t *enerd, + t_state *state_local, gmx_int64_t step, real time) +{ + int j; + int replica_id = 0; + int exchange_partner; + int maxswap = 0; + /* Number of rounds of exchanges needed to deal with any multiple + * exchanges. */ + /* Where each replica ends up after the exchange attempt(s). */ + /* The order in which multiple exchanges will occur. */ + gmx_bool bThisReplicaExchanged = FALSE; + + /* PLUMED */ + if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL); + /* END PLUMED */ + + if (MASTER(cr)) + { + replica_id = re->repl; + test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time); + prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged); + } + /* Do intra-simulation broadcast so all processors belonging to + * each simulation know whether they need to participate in + * collecting the state. Otherwise, they might as well get on with + * the next thing to do. */ + if (DOMAINDECOMP(cr)) + { +#if GMX_MPI + MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), + cr->mpi_comm_mygroup); +#endif + } + + if (bThisReplicaExchanged) + { + /* Exchange the states */ + /* Collect the global state on the master node */ + if (DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state_local, state); + } + else + { + copy_state_nonatomdata(state_local, state); + } + + if (MASTER(cr)) + { + /* There will be only one swap cycle with standard replica + * exchange, but there may be multiple swap cycles if we + * allow multiple swaps. */ + + for (j = 0; j < maxswap; j++) + { + exchange_partner = re->order[replica_id][j]; + + if (exchange_partner != replica_id) + { + /* Exchange the global states between the master nodes */ + if (debug) + { + fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); + } + exchange_state(cr->ms, exchange_partner, state); + } + } + /* For temperature-type replica exchange, we need to scale + * the velocities. */ + if (re->type == ereTEMP || re->type == ereTL) + { + scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); + } + + } + + /* With domain decomposition the global state is distributed later */ + if (!DOMAINDECOMP(cr)) + { + /* Copy the global state to the local state data structure */ + copy_state_nonatomdata(state, state_local); + } + } + + return bThisReplicaExchanged; +} + +void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) +{ + int i; + + fprintf(fplog, "\nReplica exchange statistics\n"); + + if (re->nex == 0) + { + fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", + re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); + + fprintf(fplog, "Repl average probabilities:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, NULL); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "Repl number of exchanges:\n"); + print_ind(fplog, "", re->nrepl, re->ind, NULL); + print_count(fplog, "", re->nrepl, re->nexchange); + + fprintf(fplog, "Repl average number of exchanges:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = ((real)re->nexchange[i])/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, NULL); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "\n"); + } + /* print the transition matrix */ + print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); +} diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/repl_ex.cpp.preplumed b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/repl_ex.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..ab12fc4f1789ddbc42674ed56b6268069fdc369a --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/repl_ex.cpp.preplumed @@ -0,0 +1,1399 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#include "gmxpre.h" + +#include "repl_ex.h" + +#include "config.h" + +#include <math.h> + +#include <random> + +#include "gromacs/domdec/domdec.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/math/units.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/main.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/random/threefry.h" +#include "gromacs/random/uniformintdistribution.h" +#include "gromacs/random/uniformrealdistribution.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/smalloc.h" + + +#define PROBABILITYCUTOFF 100 +/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ + +//! Rank in the multisimulaiton +#define MSRANK(ms, nodeid) (nodeid) + +enum { + ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR +}; +const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; +/* end_single_marker merely notes the end of single variable replica exchange. All types higher than + it are multiple replica exchange methods */ +/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?; + Let's wait until we feel better about the pressure control methods giving exact ensembles. Right now, we assume constant pressure */ + +typedef struct gmx_repl_ex +{ + int repl; /* replica ID */ + int nrepl; /* total number of replica */ + real temp; /* temperature */ + int type; /* replica exchange type from ere enum */ + real **q; /* quantity, e.g. temperature or lambda; first index is ere, second index is replica ID */ + gmx_bool bNPT; /* use constant pressure and temperature */ + real *pres; /* replica pressures */ + int *ind; /* replica indices */ + int *allswaps; /* used for keeping track of all the replica swaps */ + int nst; /* replica exchange interval (number of steps) */ + int nex; /* number of exchanges per interval */ + int seed; /* random seed */ + int nattempt[2]; /* number of even and odd replica change attempts */ + real *prob_sum; /* sum of probabilities */ + int **nmoves; /* number of moves between replicas i and j */ + int *nexchange; /* i-th element of the array is the number of exchanges between replica i-1 and i */ + + /* these are helper arrays for replica exchange; allocated here so they + don't have to be allocated each time */ + int *destinations; + int **cyclic; + int **order; + int *tmpswap; + gmx_bool *incycle; + gmx_bool *bEx; + + /* helper arrays to hold the quantities that are exchanged */ + real *prob; + real *Epot; + real *beta; + real *Vol; + real **de; + +} t_gmx_repl_ex; + +static gmx_bool repl_quantity(const gmx_multisim_t *ms, + struct gmx_repl_ex *re, int ere, real q) +{ + real *qall; + gmx_bool bDiff; + int s; + + snew(qall, ms->nsim); + qall[re->repl] = q; + gmx_sum_sim(ms->nsim, qall, ms); + + bDiff = FALSE; + for (s = 1; s < ms->nsim; s++) + { + if (qall[s] != qall[0]) + { + bDiff = TRUE; + } + } + + if (bDiff) + { + /* Set the replica exchange type and quantities */ + re->type = ere; + + snew(re->q[ere], re->nrepl); + for (s = 0; s < ms->nsim; s++) + { + re->q[ere][s] = qall[s]; + } + } + sfree(qall); + return bDiff; +} + +gmx_repl_ex_t init_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + const t_state *state, + const t_inputrec *ir, + int nst, int nex, int init_seed) +{ + real pres; + int i, j, k; + struct gmx_repl_ex *re; + gmx_bool bTemp; + gmx_bool bLambda = FALSE; + + fprintf(fplog, "\nInitializing Replica Exchange\n"); + + if (ms == NULL || ms->nsim == 1) + { + gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?"); + } + if (!EI_DYNAMICS(ir->eI)) + { + gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); + /* Note that PAR(cr) is defined by cr->nnodes > 1, which is + * distinct from MULTISIM(cr). A multi-simulation only runs + * with real MPI parallelism, but this does not imply PAR(cr) + * is true! + * + * Since we are using a dynamical integrator, the only + * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are + * synonymous. The only way for cr->nnodes > 1 to be true is + * if we are using DD. */ + } + + snew(re, 1); + + re->repl = ms->sim; + re->nrepl = ms->nsim; + snew(re->q, ereENDSINGLE); + + fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); + + check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE); + check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); + check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); + check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst, + "first exchange step: init_step/-replex", FALSE); + check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); + check_multi_int(fplog, ms, ir->opts.ngtc, + "the number of temperature coupling groups", FALSE); + check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); + check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); + check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); + + re->temp = ir->opts.ref_t[0]; + for (i = 1; (i < ir->opts.ngtc); i++) + { + if (ir->opts.ref_t[i] != re->temp) + { + fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + } + } + + re->type = -1; + bTemp = repl_quantity(ms, re, ereTEMP, re->temp); + if (ir->efep != efepNO) + { + bLambda = repl_quantity(ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state); + } + if (re->type == -1) /* nothing was assigned */ + { + gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); + } + if (bLambda && bTemp) + { + re->type = ereTL; + } + + if (bTemp) + { + please_cite(fplog, "Sugita1999a"); + if (ir->epc != epcNO) + { + re->bNPT = TRUE; + fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); + please_cite(fplog, "Okabe2001a"); + } + if (ir->etc == etcBERENDSEN) + { + gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", + ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); + } + } + if (bLambda) + { + if (ir->fepvals->delta_lambda != 0) /* check this? */ + { + gmx_fatal(FARGS, "delta_lambda is not zero"); + } + } + if (re->bNPT) + { + snew(re->pres, re->nrepl); + if (ir->epct == epctSURFACETENSION) + { + pres = ir->ref_p[ZZ][ZZ]; + } + else + { + pres = 0; + j = 0; + for (i = 0; i < DIM; i++) + { + if (ir->compress[i][i] != 0) + { + pres += ir->ref_p[i][i]; + j++; + } + } + pres /= j; + } + re->pres[re->repl] = pres; + gmx_sum_sim(re->nrepl, re->pres, ms); + } + + /* Make an index for increasing replica order */ + /* only makes sense if one or the other is varying, not both! + if both are varying, we trust the order the person gave. */ + snew(re->ind, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->ind[i] = i; + } + + if (re->type < ereENDSINGLE) + { + + for (i = 0; i < re->nrepl; i++) + { + for (j = i+1; j < re->nrepl; j++) + { + if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) + { + /* Unordered replicas are supposed to work, but there + * is still an issues somewhere. + * Note that at this point still re->ind[i]=i. + */ + gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", + i, j, + erename[re->type], + re->q[re->type][i], re->q[re->type][j], + erename[re->type]); + + k = re->ind[i]; + re->ind[i] = re->ind[j]; + re->ind[j] = k; + } + else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) + { + gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); + } + } + } + } + + /* keep track of all the swaps, starting with the initial placement. */ + snew(re->allswaps, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->allswaps[i] = re->ind[i]; + } + + switch (re->type) + { + case ereTEMP: + fprintf(fplog, "\nReplica exchange in temperature\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + case ereLAMBDA: + fprintf(fplog, "\nReplica exchange in lambda\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + case ereTL: + fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); + } + fprintf(fplog, "\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (re->bNPT) + { + fprintf(fplog, "\nRepl p"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); + } + + for (i = 0; i < re->nrepl; i++) + { + if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) + { + fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + } + } + } + re->nst = nst; + if (init_seed == -1) + { + if (MASTERSIM(ms)) + { + re->seed = static_cast<int>(gmx::makeRandomSeed()); + } + else + { + re->seed = 0; + } + gmx_sumi_sim(1, &(re->seed), ms); + } + else + { + re->seed = init_seed; + } + fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); + fprintf(fplog, "\nReplica random seed: %d\n", re->seed); + + re->nattempt[0] = 0; + re->nattempt[1] = 0; + + snew(re->prob_sum, re->nrepl); + snew(re->nexchange, re->nrepl); + snew(re->nmoves, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->nmoves[i], re->nrepl); + } + fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n"); + + /* generate space for the helper functions so we don't have to snew each time */ + + snew(re->destinations, re->nrepl); + snew(re->incycle, re->nrepl); + snew(re->tmpswap, re->nrepl); + snew(re->cyclic, re->nrepl); + snew(re->order, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->cyclic[i], re->nrepl+1); + snew(re->order[i], re->nrepl); + } + /* allocate space for the functions storing the data for the replicas */ + /* not all of these arrays needed in all cases, but they don't take + up much space, since the max size is nrepl**2 */ + snew(re->prob, re->nrepl); + snew(re->bEx, re->nrepl); + snew(re->beta, re->nrepl); + snew(re->Vol, re->nrepl); + snew(re->Epot, re->nrepl); + snew(re->de, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->de[i], re->nrepl); + } + re->nex = nex; + return re; +} + +static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n) +{ + real *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + + +static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n) +{ + double *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + +static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n) +{ + rvec *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + copy_rvec(buf[i], v[i]); + } + sfree(buf); + } +} + +static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) +{ + /* When t_state changes, this code should be updated. */ + int ngtc, nnhpres; + ngtc = state->ngtc * state->nhchainlength; + nnhpres = state->nnhpres* state->nhchainlength; + exchange_rvecs(ms, b, state->box, DIM); + exchange_rvecs(ms, b, state->box_rel, DIM); + exchange_rvecs(ms, b, state->boxv, DIM); + exchange_reals(ms, b, &(state->veta), 1); + exchange_reals(ms, b, &(state->vol0), 1); + exchange_rvecs(ms, b, state->svir_prev, DIM); + exchange_rvecs(ms, b, state->fvir_prev, DIM); + exchange_rvecs(ms, b, state->pres_prev, DIM); + exchange_doubles(ms, b, state->nosehoover_xi, ngtc); + exchange_doubles(ms, b, state->nosehoover_vxi, ngtc); + exchange_doubles(ms, b, state->nhpres_xi, nnhpres); + exchange_doubles(ms, b, state->nhpres_vxi, nnhpres); + exchange_doubles(ms, b, state->therm_integral, state->ngtc); + exchange_rvecs(ms, b, state->x, state->natoms); + exchange_rvecs(ms, b, state->v, state->natoms); +} + +static void copy_rvecs(rvec *s, rvec *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + copy_rvec(s[i], d[i]); + } + } +} + +static void copy_doubles(const double *s, double *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + d[i] = s[i]; + } + } +} + +static void copy_reals(const real *s, real *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + d[i] = s[i]; + } + } +} + +static void copy_ints(const int *s, int *d, int n) +{ + int i; + + if (d != NULL) + { + for (i = 0; i < n; i++) + { + d[i] = s[i]; + } + } +} + +#define scopy_rvecs(v, n) copy_rvecs(state->v, state_local->v, n); +#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n); +#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n); +#define scopy_ints(v, n) copy_ints(state->v, state_local->v, n); + +static void copy_state_nonatomdata(t_state *state, t_state *state_local) +{ + /* When t_state changes, this code should be updated. */ + int ngtc, nnhpres; + ngtc = state->ngtc * state->nhchainlength; + nnhpres = state->nnhpres* state->nhchainlength; + scopy_rvecs(box, DIM); + scopy_rvecs(box_rel, DIM); + scopy_rvecs(boxv, DIM); + state_local->veta = state->veta; + state_local->vol0 = state->vol0; + scopy_rvecs(svir_prev, DIM); + scopy_rvecs(fvir_prev, DIM); + scopy_rvecs(pres_prev, DIM); + scopy_doubles(nosehoover_xi, ngtc); + scopy_doubles(nosehoover_vxi, ngtc); + scopy_doubles(nhpres_xi, nnhpres); + scopy_doubles(nhpres_vxi, nnhpres); + scopy_doubles(therm_integral, state->ngtc); + scopy_rvecs(x, state->natoms); + scopy_rvecs(v, state->natoms); + copy_ints(&(state->fep_state), &(state_local->fep_state), 1); + scopy_reals(lambda, efptNR); +} + +static void scale_velocities(t_state *state, real fac) +{ + int i; + + if (state->v) + { + for (i = 0; i < state->natoms; i++) + { + svmul(fac, state->v[i], state->v[i]); + } + } +} + +static void print_transition_matrix(FILE *fplog, int n, int **nmoves, int *nattempt) +{ + int i, j, ntot; + float Tprint; + + ntot = nattempt[0] + nattempt[1]; + fprintf(fplog, "\n"); + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, " "); /* put the title closer to the center */ + } + fprintf(fplog, "Empirical Transition Matrix\n"); + + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%8d", (i+1)); + } + fprintf(fplog, "\n"); + + for (i = 0; i < n; i++) + { + fprintf(fplog, "Repl"); + for (j = 0; j < n; j++) + { + Tprint = 0.0; + if (nmoves[i][j] > 0) + { + Tprint = nmoves[i][j]/(2.0*ntot); + } + fprintf(fplog, "%8.4f", Tprint); + } + fprintf(fplog, "%3d\n", i); + } +} + +static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx) +{ + int i; + + fprintf(fplog, "Repl %2s %2d", leg, ind[0]); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]); + } + fprintf(fplog, "\n"); +} + +static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap) +{ + int i; + + for (i = 0; i < n; i++) + { + tmpswap[i] = allswaps[i]; + } + for (i = 0; i < n; i++) + { + allswaps[i] = tmpswap[pind[i]]; + } + + fprintf(fplog, "\nAccepted Exchanges: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", pind[i]); + } + fprintf(fplog, "\n"); + + /* the "Order After Exchange" is the state label corresponding to the configuration that + started in state listed in order, i.e. + + 3 0 1 2 + + means that the: + configuration starting in simulation 3 is now in simulation 0, + configuration starting in simulation 0 is now in simulation 1, + configuration starting in simulation 1 is now in simulation 2, + configuration starting in simulation 2 is now in simulation 3 + */ + fprintf(fplog, "Order After Exchange: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", allswaps[i]); + } + fprintf(fplog, "\n\n"); +} + +static void print_prob(FILE *fplog, const char *leg, int n, real *prob) +{ + int i; + char buf[8]; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + if (prob[i] >= 0) + { + sprintf(buf, "%4.2f", prob[i]); + fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); + } + else + { + fprintf(fplog, " "); + } + } + fprintf(fplog, "\n"); +} + +static void print_count(FILE *fplog, const char *leg, int n, int *count) +{ + int i; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %4d", count[i]); + } + fprintf(fplog, "\n"); +} + +static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) +{ + + real ediff, dpV, delta = 0; + real *Epot = re->Epot; + real *Vol = re->Vol; + real **de = re->de; + real *beta = re->beta; + + /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce + to the non permuted case */ + + switch (re->type) + { + case ereTEMP: + /* + * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 + */ + ediff = Epot[b] - Epot[a]; + delta = -(beta[bp] - beta[ap])*ediff; + break; + case ereLAMBDA: + /* two cases: when we are permuted, and not. */ + /* non-permuted: + ediff = E_new - E_old + = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] + = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] + = de[b][a] + de[a][b] */ + + /* permuted: + ediff = E_new - E_old + = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] + = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] + = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ + /* but, in the current code implementation, we flip configurations, not indices . . . + So let's examine that. + = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] + = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] + = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] + So, if we exchange b<=> bp and a<=> ap, we return to the same result. + So the simple solution is to flip the + position of perturbed and original indices in the tests. + */ + + ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); + delta = ediff*beta[a]; /* assume all same temperature in this case */ + break; + case ereTL: + /* not permuted: */ + /* delta = reduced E_new - reduced E_old + = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] + = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + + [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + + beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) + = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ + /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ + /* permuted (big breath!) */ + /* delta = reduced E_new - reduced E_old + = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) + - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + + [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] + + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + + [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] + + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) + = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) + + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ + delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (bPrint) + { + fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); + } + if (re->bNPT) + { + /* revist the calculation for 5.0. Might be some improvements. */ + dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; + if (bPrint) + { + fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); + } + delta += dpV; + } + return delta; +} + +static void +test_for_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + struct gmx_repl_ex *re, + gmx_enerdata_t *enerd, + real vol, + gmx_int64_t step, + real time) +{ + int m, i, j, a, b, ap, bp, i0, i1, tmp; + real delta = 0; + gmx_bool bPrint, bMultiEx; + gmx_bool *bEx = re->bEx; + real *prob = re->prob; + int *pind = re->destinations; /* permuted index */ + gmx_bool bEpot = FALSE; + gmx_bool bDLambda = FALSE; + gmx_bool bVol = FALSE; + gmx::ThreeFry2x64<64> rng(re->seed, gmx::RandomDomain::ReplicaExchange); + gmx::UniformRealDistribution<real> uniformRealDist; + gmx::UniformIntDistribution<int> uniformNreplDist(0, re->nrepl-1); + + bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ + fprintf(fplog, "Replica exchange at step %" GMX_PRId64 " time %.5f\n", step, time); + + if (re->bNPT) + { + for (i = 0; i < re->nrepl; i++) + { + re->Vol[i] = 0; + } + bVol = TRUE; + re->Vol[re->repl] = vol; + } + if ((re->type == ereTEMP || re->type == ereTL)) + { + for (i = 0; i < re->nrepl; i++) + { + re->Epot[i] = 0; + } + bEpot = TRUE; + re->Epot[re->repl] = enerd->term[F_EPOT]; + /* temperatures of different states*/ + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); + } + } + else + { + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ + } + } + if (re->type == ereLAMBDA || re->type == ereTL) + { + bDLambda = TRUE; + /* lambda differences. */ + /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian + minus the energy of the jth simulation in the jth Hamiltonian */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->de[i][j] = 0; + } + } + for (i = 0; i < re->nrepl; i++) + { + re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]); + } + } + + /* now actually do the communication */ + if (bVol) + { + gmx_sum_sim(re->nrepl, re->Vol, ms); + } + if (bEpot) + { + gmx_sum_sim(re->nrepl, re->Epot, ms); + } + if (bDLambda) + { + for (i = 0; i < re->nrepl; i++) + { + gmx_sum_sim(re->nrepl, re->de[i], ms); + } + } + + /* make a duplicate set of indices for shuffling */ + for (i = 0; i < re->nrepl; i++) + { + pind[i] = re->ind[i]; + } + + if (bMultiEx) + { + /* multiple random switch exchange */ + int nself = 0; + + rng.restart( step, 0 ); + + for (i = 0; i < re->nex + nself; i++) + { + /* randomly select a pair */ + /* in theory, could reduce this by identifying only which switches had a nonneglibible + probability of occurring (log p > -100) and only operate on those switches */ + /* find out which state it is from, and what label that state currently has. Likely + more work that useful. */ + i0 = uniformNreplDist(rng); + i1 = uniformNreplDist(rng); + if (i0 == i1) + { + nself++; + continue; /* self-exchange, back up and do it again */ + } + + a = re->ind[i0]; /* what are the indices of these states? */ + b = re->ind[i1]; + ap = pind[i0]; + bp = pind[i1]; + + bPrint = FALSE; /* too noisy */ + /* calculate the energy difference */ + /* if the code changes to flip the STATES, rather than the configurations, + use the commented version of the code */ + /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ + delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); + + /* we actually only use the first space in the prob and bEx array, + since there are actually many switches between pairs. */ + + if (delta <= 0) + { + /* accepted */ + prob[0] = 1; + bEx[0] = TRUE; + } + else + { + if (delta > PROBABILITYCUTOFF) + { + prob[0] = 0; + } + else + { + prob[0] = exp(-delta); + } + /* roll a number to determine if accepted */ + bEx[0] = uniformRealDist(rng) < prob[0]; + } + re->prob_sum[0] += prob[0]; + + if (bEx[0]) + { + /* swap the states */ + tmp = pind[i0]; + pind[i0] = pind[i1]; + pind[i1] = tmp; + } + } + re->nattempt[0]++; /* keep track of total permutation trials here */ + print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); + } + else + { + /* standard nearest neighbor replica exchange */ + + m = (step / re->nst) % 2; + for (i = 1; i < re->nrepl; i++) + { + a = re->ind[i-1]; + b = re->ind[i]; + + bPrint = (re->repl == a || re->repl == b); + if (i % 2 == m) + { + delta = calc_delta(fplog, bPrint, re, a, b, a, b); + if (delta <= 0) + { + /* accepted */ + prob[i] = 1; + bEx[i] = TRUE; + } + else + { + if (delta > PROBABILITYCUTOFF) + { + prob[i] = 0; + } + else + { + prob[i] = exp(-delta); + } + /* roll a number to determine if accepted */ + bEx[i] = uniformRealDist(rng) < prob[i]; + } + re->prob_sum[i] += prob[i]; + + if (bEx[i]) + { + /* swap these two */ + tmp = pind[i-1]; + pind[i-1] = pind[i]; + pind[i] = tmp; + re->nexchange[i]++; /* statistics for back compatibility */ + } + } + else + { + prob[i] = -1; + bEx[i] = FALSE; + } + } + /* print some statistics */ + print_ind(fplog, "ex", re->nrepl, re->ind, bEx); + print_prob(fplog, "pr", re->nrepl, prob); + fprintf(fplog, "\n"); + re->nattempt[m]++; + } + + /* record which moves were made and accepted */ + for (i = 0; i < re->nrepl; i++) + { + re->nmoves[re->ind[i]][pind[i]] += 1; + re->nmoves[pind[i]][re->ind[i]] += 1; + } + fflush(fplog); /* make sure we can see what the last exchange was */ +} + +static void +cyclic_decomposition(const int *destinations, + int **cyclic, + gmx_bool *incycle, + const int nrepl, + int *nswap) +{ + + int i, j, c, p; + int maxlen = 1; + for (i = 0; i < nrepl; i++) + { + incycle[i] = FALSE; + } + for (i = 0; i < nrepl; i++) /* one cycle for each replica */ + { + if (incycle[i]) + { + cyclic[i][0] = -1; + continue; + } + cyclic[i][0] = i; + incycle[i] = TRUE; + c = 1; + p = i; + for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ + { + p = destinations[p]; /* start permuting */ + if (p == i) + { + cyclic[i][c] = -1; + if (c > maxlen) + { + maxlen = c; + } + break; /* we've reached the original element, the cycle is complete, and we marked the end. */ + } + else + { + cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ + incycle[p] = TRUE; + c++; + } + } + } + *nswap = maxlen - 1; + + if (debug) + { + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Cycle %d:", i); + for (j = 0; j < nrepl; j++) + { + if (cyclic[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", cyclic[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +compute_exchange_order(int **cyclic, + int **order, + const int nrepl, + const int maxswap) +{ + int i, j; + + for (j = 0; j < maxswap; j++) + { + for (i = 0; i < nrepl; i++) + { + if (cyclic[i][j+1] >= 0) + { + order[cyclic[i][j+1]][j] = cyclic[i][j]; + order[cyclic[i][j]][j] = cyclic[i][j+1]; + } + } + for (i = 0; i < nrepl; i++) + { + if (order[i][j] < 0) + { + order[i][j] = i; /* if it's not exchanging, it should stay this round*/ + } + } + } + + if (debug) + { + fprintf(debug, "Replica Exchange Order\n"); + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Replica %d:", i); + for (j = 0; j < maxswap; j++) + { + if (order[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", order[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +prepare_to_do_exchange(struct gmx_repl_ex *re, + const int replica_id, + int *maxswap, + gmx_bool *bThisReplicaExchanged) +{ + int i, j; + /* Hold the cyclic decomposition of the (multiple) replica + * exchange. */ + gmx_bool bAnyReplicaExchanged = FALSE; + *bThisReplicaExchanged = FALSE; + + for (i = 0; i < re->nrepl; i++) + { + if (re->destinations[i] != re->ind[i]) + { + /* only mark as exchanged if the index has been shuffled */ + bAnyReplicaExchanged = TRUE; + break; + } + } + if (bAnyReplicaExchanged) + { + /* reinitialize the placeholder arrays */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->cyclic[i][j] = -1; + re->order[i][j] = -1; + } + } + + /* Identify the cyclic decomposition of the permutation (very + * fast if neighbor replica exchange). */ + cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); + + /* Now translate the decomposition into a replica exchange + * order at each step. */ + compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap); + + /* Did this replica do any exchange at any point? */ + for (j = 0; j < *maxswap; j++) + { + if (replica_id != re->order[replica_id][j]) + { + *bThisReplicaExchanged = TRUE; + break; + } + } + } +} + +gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re, + t_state *state, gmx_enerdata_t *enerd, + t_state *state_local, gmx_int64_t step, real time) +{ + int j; + int replica_id = 0; + int exchange_partner; + int maxswap = 0; + /* Number of rounds of exchanges needed to deal with any multiple + * exchanges. */ + /* Where each replica ends up after the exchange attempt(s). */ + /* The order in which multiple exchanges will occur. */ + gmx_bool bThisReplicaExchanged = FALSE; + + if (MASTER(cr)) + { + replica_id = re->repl; + test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time); + prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged); + } + /* Do intra-simulation broadcast so all processors belonging to + * each simulation know whether they need to participate in + * collecting the state. Otherwise, they might as well get on with + * the next thing to do. */ + if (DOMAINDECOMP(cr)) + { +#if GMX_MPI + MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), + cr->mpi_comm_mygroup); +#endif + } + + if (bThisReplicaExchanged) + { + /* Exchange the states */ + /* Collect the global state on the master node */ + if (DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state_local, state); + } + else + { + copy_state_nonatomdata(state_local, state); + } + + if (MASTER(cr)) + { + /* There will be only one swap cycle with standard replica + * exchange, but there may be multiple swap cycles if we + * allow multiple swaps. */ + + for (j = 0; j < maxswap; j++) + { + exchange_partner = re->order[replica_id][j]; + + if (exchange_partner != replica_id) + { + /* Exchange the global states between the master nodes */ + if (debug) + { + fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); + } + exchange_state(cr->ms, exchange_partner, state); + } + } + /* For temperature-type replica exchange, we need to scale + * the velocities. */ + if (re->type == ereTEMP || re->type == ereTL) + { + scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); + } + + } + + /* With domain decomposition the global state is distributed later */ + if (!DOMAINDECOMP(cr)) + { + /* Copy the global state to the local state data structure */ + copy_state_nonatomdata(state, state_local); + } + } + + return bThisReplicaExchanged; +} + +void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) +{ + int i; + + fprintf(fplog, "\nReplica exchange statistics\n"); + + if (re->nex == 0) + { + fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", + re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); + + fprintf(fplog, "Repl average probabilities:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, NULL); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "Repl number of exchanges:\n"); + print_ind(fplog, "", re->nrepl, re->ind, NULL); + print_count(fplog, "", re->nrepl, re->nexchange); + + fprintf(fplog, "Repl average number of exchanges:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = ((real)re->nexchange[i])/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, NULL); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "\n"); + } + /* print the transition matrix */ + print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); +} diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/runner.cpp b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/runner.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d642a42e20b80cc6b6432ed38f566d72b16012ca --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/runner.cpp @@ -0,0 +1,1398 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements the MD runner routine calling all integrators. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "runner.h" + +#include "config.h" + +#include <assert.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> + +#include <algorithm> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/essentialdynamics/edsam.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/fileio/checkpoint.h" +#include "gromacs/fileio/oenv.h" +#include "gromacs/fileio/tpxio.h" +#include "gromacs/gmxlib/md_logging.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/hardware/cpuinfo.h" +#include "gromacs/hardware/detecthardware.h" +#include "gromacs/listed-forces/disre.h" +#include "gromacs/listed-forces/orires.h" +#include "gromacs/math/calculate-ewald-splitting-coefficient.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/calc_verletbuf.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/integrator.h" +#include "gromacs/mdlib/main.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/minimize.h" +#include "gromacs/mdlib/nbnxn_search.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tpi.h" +#include "gromacs/mdrunutility/threadaffinity.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/pulling/pull_rotation.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/smalloc.h" + +#include "deform.h" +#include "md.h" +#include "repl_ex.h" +#include "resource-division.h" + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +/* END PLUMED */ + +#ifdef GMX_FAHCORE +#include "corewrap.h" +#endif + +//! First step used in pressure scaling +gmx_int64_t deform_init_init_step_tpx; +//! Initial box for pressure scaling +matrix deform_init_box_tpx; +//! MPI variable for use in pressure scaling +tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER; + +#if GMX_THREAD_MPI +/* The minimum number of atoms per tMPI thread. With fewer atoms than this, + * the number of threads will get lowered. + */ +#define MIN_ATOMS_PER_MPI_THREAD 90 +#define MIN_ATOMS_PER_GPU 900 + +struct mdrunner_arglist +{ + gmx_hw_opt_t hw_opt; + FILE *fplog; + t_commrec *cr; + int nfile; + const t_filenm *fnm; + const gmx_output_env_t *oenv; + gmx_bool bVerbose; + int nstglobalcomm; + ivec ddxyz; + int dd_rank_order; + int npme; + real rdd; + real rconstr; + const char *dddlb_opt; + real dlb_scale; + const char *ddcsx; + const char *ddcsy; + const char *ddcsz; + const char *nbpu_opt; + int nstlist_cmdline; + gmx_int64_t nsteps_cmdline; + int nstepout; + int resetstep; + int nmultisim; + int repl_ex_nst; + int repl_ex_nex; + int repl_ex_seed; + real pforce; + real cpt_period; + real max_hours; + int imdport; + unsigned long Flags; +}; + + +/* The function used for spawning threads. Extracts the mdrunner() + arguments from its one argument and calls mdrunner(), after making + a commrec. */ +static void mdrunner_start_fn(void *arg) +{ + try + { + struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg; + struct mdrunner_arglist mc = *mda; /* copy the arg list to make sure + that it's thread-local. This doesn't + copy pointed-to items, of course, + but those are all const. */ + t_commrec *cr; /* we need a local version of this */ + FILE *fplog = NULL; + t_filenm *fnm; + + fnm = dup_tfn(mc.nfile, mc.fnm); + + cr = reinitialize_commrec_for_this_thread(mc.cr); + + if (MASTER(cr)) + { + fplog = mc.fplog; + } + + gmx::mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, + mc.bVerbose, mc.nstglobalcomm, + mc.ddxyz, mc.dd_rank_order, mc.npme, mc.rdd, + mc.rconstr, mc.dddlb_opt, mc.dlb_scale, + mc.ddcsx, mc.ddcsy, mc.ddcsz, + mc.nbpu_opt, mc.nstlist_cmdline, + mc.nsteps_cmdline, mc.nstepout, mc.resetstep, + mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, + mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +} + + +/* called by mdrunner() to start a specific number of threads (including + the main thread) for thread-parallel runs. This in turn calls mdrunner() + for each thread. + All options besides nthreads are the same as for mdrunner(). */ +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, + FILE *fplog, t_commrec *cr, int nfile, + const t_filenm fnm[], const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + ivec ddxyz, int dd_rank_order, int npme, + real rdd, real rconstr, + const char *dddlb_opt, real dlb_scale, + const char *ddcsx, const char *ddcsy, const char *ddcsz, + const char *nbpu_opt, int nstlist_cmdline, + gmx_int64_t nsteps_cmdline, + int nstepout, int resetstep, + int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real pforce, real cpt_period, real max_hours, + unsigned long Flags) +{ + int ret; + struct mdrunner_arglist *mda; + t_commrec *crn; /* the new commrec */ + t_filenm *fnmn; + + /* first check whether we even need to start tMPI */ + if (hw_opt->nthreads_tmpi < 2) + { + return cr; + } + + /* a few small, one-time, almost unavoidable memory leaks: */ + snew(mda, 1); + fnmn = dup_tfn(nfile, fnm); + + /* fill the data structure to pass as void pointer to thread start fn */ + /* hw_opt contains pointers, which should all be NULL at this stage */ + mda->hw_opt = *hw_opt; + mda->fplog = fplog; + mda->cr = cr; + mda->nfile = nfile; + mda->fnm = fnmn; + mda->oenv = oenv; + mda->bVerbose = bVerbose; + mda->nstglobalcomm = nstglobalcomm; + mda->ddxyz[XX] = ddxyz[XX]; + mda->ddxyz[YY] = ddxyz[YY]; + mda->ddxyz[ZZ] = ddxyz[ZZ]; + mda->dd_rank_order = dd_rank_order; + mda->npme = npme; + mda->rdd = rdd; + mda->rconstr = rconstr; + mda->dddlb_opt = dddlb_opt; + mda->dlb_scale = dlb_scale; + mda->ddcsx = ddcsx; + mda->ddcsy = ddcsy; + mda->ddcsz = ddcsz; + mda->nbpu_opt = nbpu_opt; + mda->nstlist_cmdline = nstlist_cmdline; + mda->nsteps_cmdline = nsteps_cmdline; + mda->nstepout = nstepout; + mda->resetstep = resetstep; + mda->nmultisim = nmultisim; + mda->repl_ex_nst = repl_ex_nst; + mda->repl_ex_nex = repl_ex_nex; + mda->repl_ex_seed = repl_ex_seed; + mda->pforce = pforce; + mda->cpt_period = cpt_period; + mda->max_hours = max_hours; + mda->Flags = Flags; + + /* now spawn new threads that start mdrunner_start_fn(), while + the main thread returns, we set thread affinity later */ + ret = tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE, + mdrunner_start_fn, (void*)(mda) ); + if (ret != TMPI_SUCCESS) + { + return NULL; + } + + crn = reinitialize_commrec_for_this_thread(cr); + return crn; +} + +#endif /* GMX_THREAD_MPI */ + + +/*! \brief Cost of non-bonded kernels + * + * We determine the extra cost of the non-bonded kernels compared to + * a reference nstlist value of 10 (which is the default in grompp). + */ +static const int nbnxnReferenceNstlist = 10; +//! The values to try when switching +const int nstlist_try[] = { 20, 25, 40 }; +//! Number of elements in the neighborsearch list trials. +#define NNSTL sizeof(nstlist_try)/sizeof(nstlist_try[0]) +/* Increase nstlist until the non-bonded cost increases more than listfac_ok, + * but never more than listfac_max. + * A standard (protein+)water system at 300K with PME ewald_rtol=1e-5 + * needs 1.28 at rcoulomb=0.9 and 1.24 at rcoulomb=1.0 to get to nstlist=40. + * Note that both CPU and GPU factors are conservative. Performance should + * not go down due to this tuning, except with a relatively slow GPU. + * On the other hand, at medium/high parallelization or with fast GPUs + * nstlist will not be increased enough to reach optimal performance. + */ +/* CPU: pair-search is about a factor 1.5 slower than the non-bonded kernel */ +//! Max OK performance ratio beween force calc and neighbor searching +static const float nbnxn_cpu_listfac_ok = 1.05; +//! Too high performance ratio beween force calc and neighbor searching +static const float nbnxn_cpu_listfac_max = 1.09; +/* CPU: pair-search is about a factor 2-3 slower than the non-bonded kernel */ +//! Max OK performance ratio beween force calc and neighbor searching +static const float nbnxn_knl_listfac_ok = 1.22; +//! Too high performance ratio beween force calc and neighbor searching +static const float nbnxn_knl_listfac_max = 1.3; +/* GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel */ +//! Max OK performance ratio beween force calc and neighbor searching +static const float nbnxn_gpu_listfac_ok = 1.20; +//! Too high performance ratio beween force calc and neighbor searching +static const float nbnxn_gpu_listfac_max = 1.30; + +/*! \brief Try to increase nstlist when using the Verlet cut-off scheme */ +static void increase_nstlist(FILE *fp, t_commrec *cr, + t_inputrec *ir, int nstlist_cmdline, + const gmx_mtop_t *mtop, matrix box, + gmx_bool bGPU, const gmx::CpuInfo &cpuinfo) +{ + float listfac_ok, listfac_max; + int nstlist_orig, nstlist_prev; + verletbuf_list_setup_t ls; + real rlistWithReferenceNstlist, rlist_inc, rlist_ok, rlist_max; + real rlist_new, rlist_prev; + size_t nstlist_ind = 0; + t_state state_tmp; + gmx_bool bBox, bDD, bCont; + const char *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n"; + const char *nve_err = "Can not increase nstlist because an NVE ensemble is used"; + const char *vbd_err = "Can not increase nstlist because verlet-buffer-tolerance is not set or used"; + const char *box_err = "Can not increase nstlist because the box is too small"; + const char *dd_err = "Can not increase nstlist because of domain decomposition limitations"; + char buf[STRLEN]; + + if (nstlist_cmdline <= 0) + { + if (ir->nstlist == 1) + { + /* The user probably set nstlist=1 for a reason, + * don't mess with the settings. + */ + return; + } + + if (fp != NULL && bGPU && ir->nstlist < nstlist_try[0]) + { + fprintf(fp, nstl_gpu, ir->nstlist); + } + nstlist_ind = 0; + while (nstlist_ind < NNSTL && ir->nstlist >= nstlist_try[nstlist_ind]) + { + nstlist_ind++; + } + if (nstlist_ind == NNSTL) + { + /* There are no larger nstlist value to try */ + return; + } + } + + if (EI_MD(ir->eI) && ir->etc == etcNO) + { + if (MASTER(cr)) + { + fprintf(stderr, "%s\n", nve_err); + } + if (fp != NULL) + { + fprintf(fp, "%s\n", nve_err); + } + + return; + } + + if (ir->verletbuf_tol == 0 && bGPU) + { + gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp"); + } + + if (ir->verletbuf_tol < 0) + { + if (MASTER(cr)) + { + fprintf(stderr, "%s\n", vbd_err); + } + if (fp != NULL) + { + fprintf(fp, "%s\n", vbd_err); + } + + return; + } + + if (bGPU) + { + listfac_ok = nbnxn_gpu_listfac_ok; + listfac_max = nbnxn_gpu_listfac_max; + } + else if (cpuinfo.feature(gmx::CpuInfo::Feature::X86_Avx512ER)) + { + listfac_ok = nbnxn_knl_listfac_ok; + listfac_max = nbnxn_knl_listfac_max; + } + else + { + listfac_ok = nbnxn_cpu_listfac_ok; + listfac_max = nbnxn_cpu_listfac_max; + } + + nstlist_orig = ir->nstlist; + if (nstlist_cmdline > 0) + { + if (fp) + { + sprintf(buf, "Getting nstlist=%d from command line option", + nstlist_cmdline); + } + ir->nstlist = nstlist_cmdline; + } + + verletbuf_get_list_setup(TRUE, bGPU, &ls); + + /* Allow rlist to make the list a given factor larger than the list + * would be with the reference value for nstlist (10). + */ + nstlist_prev = ir->nstlist; + ir->nstlist = nbnxnReferenceNstlist; + calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, + &rlistWithReferenceNstlist); + ir->nstlist = nstlist_prev; + + /* Determine the pair list size increase due to zero interactions */ + rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j, + mtop->natoms/det(box)); + rlist_ok = (rlistWithReferenceNstlist + rlist_inc)*std::cbrt(listfac_ok) - rlist_inc; + rlist_max = (rlistWithReferenceNstlist + rlist_inc)*std::cbrt(listfac_max) - rlist_inc; + if (debug) + { + fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n", + rlist_inc, rlist_ok, rlist_max); + } + + nstlist_prev = nstlist_orig; + rlist_prev = ir->rlist; + do + { + if (nstlist_cmdline <= 0) + { + ir->nstlist = nstlist_try[nstlist_ind]; + } + + /* Set the pair-list buffer size in ir */ + calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new); + + /* Does rlist fit in the box? */ + bBox = (gmx::square(rlist_new) < max_cutoff2(ir->ePBC, box)); + bDD = TRUE; + if (bBox && DOMAINDECOMP(cr)) + { + /* Check if rlist fits in the domain decomposition */ + if (inputrec2nboundeddim(ir) < DIM) + { + gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet"); + } + copy_mat(box, state_tmp.box); + bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new); + } + + if (debug) + { + fprintf(debug, "nstlist %d rlist %.3f bBox %d bDD %d\n", + ir->nstlist, rlist_new, bBox, bDD); + } + + bCont = FALSE; + + if (nstlist_cmdline <= 0) + { + if (bBox && bDD && rlist_new <= rlist_max) + { + /* Increase nstlist */ + nstlist_prev = ir->nstlist; + rlist_prev = rlist_new; + bCont = (nstlist_ind+1 < NNSTL && rlist_new < rlist_ok); + } + else + { + /* Stick with the previous nstlist */ + ir->nstlist = nstlist_prev; + rlist_new = rlist_prev; + bBox = TRUE; + bDD = TRUE; + } + } + + nstlist_ind++; + } + while (bCont); + + if (!bBox || !bDD) + { + gmx_warning(!bBox ? box_err : dd_err); + if (fp != NULL) + { + fprintf(fp, "\n%s\n", bBox ? box_err : dd_err); + } + ir->nstlist = nstlist_orig; + } + else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist) + { + sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g", + nstlist_orig, ir->nstlist, + ir->rlist, rlist_new); + if (MASTER(cr)) + { + fprintf(stderr, "%s\n\n", buf); + } + if (fp != NULL) + { + fprintf(fp, "%s\n\n", buf); + } + ir->rlist = rlist_new; + } +} + +/*! \brief Initialize variables for Verlet scheme simulation */ +static void prepare_verlet_scheme(FILE *fplog, + t_commrec *cr, + t_inputrec *ir, + int nstlist_cmdline, + const gmx_mtop_t *mtop, + matrix box, + gmx_bool bUseGPU, + const gmx::CpuInfo &cpuinfo) +{ + /* For NVE simulations, we will retain the initial list buffer */ + if (EI_DYNAMICS(ir->eI) && + ir->verletbuf_tol > 0 && + !(EI_MD(ir->eI) && ir->etc == etcNO)) + { + /* Update the Verlet buffer size for the current run setup */ + verletbuf_list_setup_t ls; + real rlist_new; + + /* Here we assume SIMD-enabled kernels are being used. But as currently + * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 + * and 4x2 gives a larger buffer than 4x4, this is ok. + */ + verletbuf_get_list_setup(TRUE, bUseGPU, &ls); + + calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new); + + if (rlist_new != ir->rlist) + { + if (fplog != NULL) + { + fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", + ir->rlist, rlist_new, + ls.cluster_size_i, ls.cluster_size_j); + } + ir->rlist = rlist_new; + } + } + + if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0)) + { + gmx_fatal(FARGS, "Can not set nstlist without %s", + !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance"); + } + + if (EI_DYNAMICS(ir->eI)) + { + /* Set or try nstlist values */ + increase_nstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, bUseGPU, cpuinfo); + } +} + +/*! \brief Override the nslist value in inputrec + * + * with value passed on the command line (if any) + */ +static void override_nsteps_cmdline(FILE *fplog, + gmx_int64_t nsteps_cmdline, + t_inputrec *ir, + const t_commrec *cr) +{ + assert(ir); + assert(cr); + + /* override with anything else than the default -2 */ + if (nsteps_cmdline > -2) + { + char sbuf_steps[STEPSTRSIZE]; + char sbuf_msg[STRLEN]; + + ir->nsteps = nsteps_cmdline; + if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1) + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps", + gmx_step_str(nsteps_cmdline, sbuf_steps), + fabs(nsteps_cmdline*ir->delta_t)); + } + else + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps", + gmx_step_str(nsteps_cmdline, sbuf_steps)); + } + + md_print_warn(cr, fplog, "%s\n", sbuf_msg); + } + else if (nsteps_cmdline < -2) + { + gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %d", + nsteps_cmdline); + } + /* Do nothing if nsteps_cmdline == -2 */ +} + +namespace gmx +{ + +//! \brief Return the correct integrator function. +static integrator_t *my_integrator(unsigned int ei) +{ + switch (ei) + { + case eiMD: + case eiBD: + case eiSD1: + case eiVV: + case eiVVAK: + if (!EI_DYNAMICS(ei)) + { + GMX_THROW(APIError("do_md integrator would be called for a non-dynamical integrator")); + } + return do_md; + case eiSteep: + return do_steep; + case eiCG: + return do_cg; + case eiNM: + return do_nm; + case eiLBFGS: + return do_lbfgs; + case eiTPI: + case eiTPIC: + if (!EI_TPI(ei)) + { + GMX_THROW(APIError("do_tpi integrator would be called for a non-TPI integrator")); + } + return do_tpi; + case eiSD2_REMOVED: + GMX_THROW(NotImplementedError("SD2 integrator has been removed")); + default: + GMX_THROW(APIError("Non existing integrator selected")); + } +} + +int mdrunner(gmx_hw_opt_t *hw_opt, + FILE *fplog, t_commrec *cr, int nfile, + const t_filenm fnm[], const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + ivec ddxyz, int dd_rank_order, int npme, real rdd, real rconstr, + const char *dddlb_opt, real dlb_scale, + const char *ddcsx, const char *ddcsy, const char *ddcsz, + const char *nbpu_opt, int nstlist_cmdline, + gmx_int64_t nsteps_cmdline, int nstepout, int resetstep, + int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex, + int repl_ex_seed, real pforce, real cpt_period, real max_hours, + int imdport, unsigned long Flags) +{ + gmx_bool bForceUseGPU, bTryUseGPU, bRerunMD; + t_inputrec *inputrec; + t_state *state = NULL; + matrix box; + gmx_ddbox_t ddbox = {0}; + int npme_major, npme_minor; + t_nrnb *nrnb; + gmx_mtop_t *mtop = NULL; + t_mdatoms *mdatoms = NULL; + t_forcerec *fr = NULL; + t_fcdata *fcd = NULL; + real ewaldcoeff_q = 0; + real ewaldcoeff_lj = 0; + struct gmx_pme_t **pmedata = NULL; + gmx_vsite_t *vsite = NULL; + gmx_constr_t constr; + int nChargePerturbed = -1, nTypePerturbed = 0, status; + gmx_wallcycle_t wcycle; + gmx_walltime_accounting_t walltime_accounting = NULL; + int rc; + gmx_int64_t reset_counters; + gmx_edsam_t ed = NULL; + int nthreads_pme = 1; + gmx_hw_info_t *hwinfo = NULL; + /* The master rank decides early on bUseGPU and broadcasts this later */ + gmx_bool bUseGPU = FALSE; + + /* CAUTION: threads may be started later on in this function, so + cr doesn't reflect the final parallel state right now */ + snew(inputrec, 1); + snew(mtop, 1); + + if (Flags & MD_APPENDFILES) + { + fplog = NULL; + } + + bRerunMD = (Flags & MD_RERUN); + bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0); + bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU; + + /* Detect hardware, gather information. This is an operation that is + * global for this process (MPI rank). */ + hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU); + + gmx_print_detected_hardware(fplog, cr, hwinfo); + + if (fplog != NULL) + { + /* Print references after all software/hardware printing */ + please_cite(fplog, "Abraham2015"); + please_cite(fplog, "Pall2015"); + please_cite(fplog, "Pronk2013"); + please_cite(fplog, "Hess2008b"); + please_cite(fplog, "Spoel2005a"); + please_cite(fplog, "Lindahl2001a"); + please_cite(fplog, "Berendsen95a"); + } + + snew(state, 1); + if (SIMMASTER(cr)) + { + /* Read (nearly) all data required for the simulation */ + read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, mtop); + + if (inputrec->cutoff_scheme == ecutsVERLET) + { + /* Here the master rank decides if all ranks will use GPUs */ + bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 || + getenv("GMX_EMULATE_GPU") != NULL); + + /* TODO add GPU kernels for this and replace this check by: + * (bUseGPU && (ir->vdwtype == evdwPME && + * ir->ljpme_combination_rule == eljpmeLB)) + * update the message text and the content of nbnxn_acceleration_supported. + */ + if (bUseGPU && + !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD)) + { + /* Fallback message printed by nbnxn_acceleration_supported */ + if (bForceUseGPU) + { + gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings"); + } + bUseGPU = FALSE; + } + + prepare_verlet_scheme(fplog, cr, + inputrec, nstlist_cmdline, mtop, state->box, + bUseGPU, *hwinfo->cpuInfo); + } + else + { + if (nstlist_cmdline > 0) + { + gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme"); + } + + if (hwinfo->gpu_info.n_dev_compatible > 0) + { + md_print_warn(cr, fplog, + "NOTE: GPU(s) found, but the current simulation can not use GPUs\n" + " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"); + } + + if (bForceUseGPU) + { + gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet"); + } + +#if GMX_TARGET_BGQ + md_print_warn(cr, fplog, + "NOTE: There is no SIMD implementation of the group scheme kernels on\n" + " BlueGene/Q. You will observe better performance from using the\n" + " Verlet cut-off scheme.\n"); +#endif + } + } + + /* Check and update the hardware options for internal consistency */ + check_and_update_hw_opt_1(hw_opt, cr, npme); + + /* Early check for externally set process affinity. */ + gmx_check_thread_affinity_set(fplog, cr, + hw_opt, hwinfo->nthreads_hw_avail, FALSE); + +#if GMX_THREAD_MPI + if (SIMMASTER(cr)) + { + if (npme > 0 && hw_opt->nthreads_tmpi <= 0) + { + gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks"); + } + + /* Since the master knows the cut-off scheme, update hw_opt for this. + * This is done later for normal MPI and also once more with tMPI + * for all tMPI ranks. + */ + check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); + + /* NOW the threads will be started: */ + hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo, + hw_opt, + inputrec, mtop, + cr, fplog, bUseGPU); + + if (hw_opt->nthreads_tmpi > 1) + { + t_commrec *cr_old = cr; + /* now start the threads. */ + cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, + oenv, bVerbose, nstglobalcomm, + ddxyz, dd_rank_order, npme, rdd, rconstr, + dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz, + nbpu_opt, nstlist_cmdline, + nsteps_cmdline, nstepout, resetstep, nmultisim, + repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce, + cpt_period, max_hours, + Flags); + /* the main thread continues here with a new cr. We don't deallocate + the old cr because other threads may still be reading it. */ + if (cr == NULL) + { + gmx_comm("Failed to spawn threads"); + } + } + } +#endif + /* END OF CAUTION: cr is now reliable */ + + if (PAR(cr)) + { + /* now broadcast everything to the non-master nodes/threads: */ + init_parallel(cr, inputrec, mtop); + + /* The master rank decided on the use of GPUs, + * broadcast this information to all ranks. + */ + gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr); + } + + if (fplog != NULL) + { + pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE); + fprintf(fplog, "\n"); + } + + /* now make sure the state is initialized and propagated */ + set_state_entries(state, inputrec); + + /* A parallel command line option consistency check that we can + only do after any threads have started. */ + if (!PAR(cr) && + (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || npme > 0)) + { + gmx_fatal(FARGS, + "The -dd or -npme option request a parallel simulation, " +#if !GMX_MPI + "but %s was compiled without threads or MPI enabled" +#else +#if GMX_THREAD_MPI + "but the number of MPI-threads (option -ntmpi) is not set or is 1" +#else + "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec" +#endif +#endif + , output_env_get_program_display_name(oenv) + ); + } + + if (bRerunMD && + (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) + { + gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun"); + } + + if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank"); + } + + if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) + { + if (npme > 0) + { + gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), + "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ"); + } + + npme = 0; + } + + if (bUseGPU && npme < 0) + { + /* With GPUs we don't automatically use PME-only ranks. PME ranks can + * improve performance with many threads per GPU, since our OpenMP + * scaling is bad, but it's difficult to automate the setup. + */ + npme = 0; + } + +#ifdef GMX_FAHCORE + if (MASTER(cr)) + { + fcRegisterSteps(inputrec->nsteps, inputrec->init_step); + } +#endif + + /* NMR restraints must be initialized before load_checkpoint, + * since with time averaging the history is added to t_state. + * For proper consistency check we therefore need to extend + * t_state here. + * So the PME-only nodes (if present) will also initialize + * the distance restraints. + */ + snew(fcd, 1); + + /* This needs to be called before read_checkpoint to extend the state */ + init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0); + + init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires), + state); + + if (inputrecDeform(inputrec)) + { + /* Store the deform reference box before reading the checkpoint */ + if (SIMMASTER(cr)) + { + copy_mat(state->box, box); + } + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + /* Because we do not have the update struct available yet + * in which the reference values should be stored, + * we store them temporarily in static variables. + * This should be thread safe, since they are only written once + * and with identical values. + */ + tMPI_Thread_mutex_lock(&deform_init_box_mutex); + deform_init_init_step_tpx = inputrec->init_step; + copy_mat(box, deform_init_box_tpx); + tMPI_Thread_mutex_unlock(&deform_init_box_mutex); + } + + if (Flags & MD_STARTFROMCPT) + { + /* Check if checkpoint file exists before doing continuation. + * This way we can use identical input options for the first and subsequent runs... + */ + gmx_bool bReadEkin; + + load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog, + cr, ddxyz, &npme, + inputrec, state, &bReadEkin, + (Flags & MD_APPENDFILES), + (Flags & MD_APPENDFILESSET)); + + if (bReadEkin) + { + Flags |= MD_READ_EKIN; + } + } + + if (MASTER(cr) && (Flags & MD_APPENDFILES)) + { + gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, + Flags, &fplog); + } + + /* override nsteps with value from cmdline */ + override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr); + + if (SIMMASTER(cr)) + { + copy_mat(state->box, box); + } + + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + + // TODO This should move to do_md(), because it only makes sense + // with dynamical integrators, but there is no test coverage and + // it interacts with constraints, somehow. + /* Essential dynamics */ + if (opt2bSet("-ei", nfile, fnm)) + { + /* Open input and output files, allocate space for ED data structure */ + ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr); + } + + if (PAR(cr) && !(EI_TPI(inputrec->eI) || + inputrec->eI == eiNM)) + { + cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, npme, + dd_rank_order, + rdd, rconstr, + dddlb_opt, dlb_scale, + ddcsx, ddcsy, ddcsz, + mtop, inputrec, + box, state->x, + &ddbox, &npme_major, &npme_minor); + } + else + { + /* PME, if used, is done on all nodes with 1D decomposition */ + cr->npmenodes = 0; + cr->duty = (DUTY_PP | DUTY_PME); + npme_major = 1; + npme_minor = 1; + + if (inputrec->ePBC == epbcSCREW) + { + gmx_fatal(FARGS, + "pbc=%s is only implemented with domain decomposition", + epbc_names[inputrec->ePBC]); + } + } + + if (PAR(cr)) + { + /* After possible communicator splitting in make_dd_communicators. + * we can set up the intra/inter node communication. + */ + gmx_setup_nodecomm(fplog, cr); + } + + /* Initialize per-physical-node MPI process/thread ID and counters. */ + gmx_init_intranode_counters(cr); +#if GMX_MPI + if (MULTISIM(cr)) + { + md_print_info(cr, fplog, + "This is simulation %d out of %d running as a composite GROMACS\n" + "multi-simulation job. Setup for this simulation:\n\n", + cr->ms->sim, cr->ms->nsim); + } + md_print_info(cr, fplog, "Using %d MPI %s\n", + cr->nnodes, +#if GMX_THREAD_MPI + cr->nnodes == 1 ? "thread" : "threads" +#else + cr->nnodes == 1 ? "process" : "processes" +#endif + ); + fflush(stderr); +#endif + + /* Check and update hw_opt for the cut-off scheme */ + check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); + + /* Check and update hw_opt for the number of MPI ranks */ + check_and_update_hw_opt_3(hw_opt); + + gmx_omp_nthreads_init(fplog, cr, + hwinfo->nthreads_hw_avail, + hw_opt->nthreads_omp, + hw_opt->nthreads_omp_pme, + (cr->duty & DUTY_PP) == 0, + inputrec->cutoff_scheme == ecutsVERLET); + +#ifndef NDEBUG + if (EI_TPI(inputrec->eI) && + inputrec->cutoff_scheme == ecutsVERLET) + { + gmx_feenableexcept(); + } +#endif + + if (bUseGPU) + { + /* Select GPU id's to use */ + gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU, + &hw_opt->gpu_opt); + } + else + { + /* Ignore (potentially) manually selected GPUs */ + hw_opt->gpu_opt.n_dev_use = 0; + } + + /* check consistency across ranks of things like SIMD + * support and number of GPUs selected */ + gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU); + + /* Now that we know the setup is consistent, check for efficiency */ + check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET, + cr, fplog); + + if (DOMAINDECOMP(cr)) + { + /* When we share GPUs over ranks, we need to know this for the DLB */ + dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt); + } + + /* getting number of PP/PME threads + PME: env variable should be read only on one node to make sure it is + identical everywhere; + */ + nthreads_pme = gmx_omp_nthreads_get(emntPME); + + wcycle = wallcycle_init(fplog, resetstep, cr); + + if (PAR(cr)) + { + /* Master synchronizes its value of reset_counters with all nodes + * including PME only nodes */ + reset_counters = wcycle_get_reset_counters(wcycle); + gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr); + wcycle_set_reset_counters(wcycle, reset_counters); + } + + snew(nrnb, 1); + if (cr->duty & DUTY_PP) + { + bcast_state(cr, state); + + /* Initiate forcerecord */ + fr = mk_forcerec(); + fr->hwinfo = hwinfo; + fr->gpu_opt = &hw_opt->gpu_opt; + init_forcerec(fplog, fr, fcd, inputrec, mtop, cr, box, + opt2fn("-table", nfile, fnm), + opt2fn("-tablep", nfile, fnm), + opt2fn("-tableb", nfile, fnm), + nbpu_opt, + FALSE, + pforce); + + /* Initialize QM-MM */ + if (fr->bQMMM) + { + init_QMMMrec(cr, mtop, inputrec, fr); + } + + /* Initialize the mdatoms structure. + * mdatoms is not filled with atom data, + * as this can not be done now with domain decomposition. + */ + mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO); + + /* Initialize the virtual site communication */ + vsite = init_vsite(mtop, cr, FALSE); + + calc_shifts(box, fr->shift_vec); + + /* With periodic molecules the charge groups should be whole at start up + * and the virtual sites should not be far from their proper positions. + */ + if (!inputrec->bContinuation && MASTER(cr) && + !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols)) + { + /* Make molecules whole at start of run */ + if (fr->ePBC != epbcNONE) + { + do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x); + } + if (vsite) + { + /* Correct initial vsite positions are required + * for the initial distribution in the domain decomposition + * and for the initial shell prediction. + */ + construct_vsites_mtop(vsite, mtop, state->x); + } + } + + if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) + { + ewaldcoeff_q = fr->ewaldcoeff_q; + ewaldcoeff_lj = fr->ewaldcoeff_lj; + pmedata = &fr->pmedata; + } + else + { + pmedata = NULL; + } + } + else + { + /* This is a PME only node */ + + /* We don't need the state */ + done_state(state); + + ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); + ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); + snew(pmedata, 1); + } + + if (hw_opt->thread_affinity != threadaffOFF) + { + /* Before setting affinity, check whether the affinity has changed + * - which indicates that probably the OpenMP library has changed it + * since we first checked). + */ + gmx_check_thread_affinity_set(fplog, cr, + hw_opt, hwinfo->nthreads_hw_avail, TRUE); + + /* Set the CPU affinity */ + gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo); + } + + /* Initiate PME if necessary, + * either on all nodes or on dedicated PME nodes only. */ + if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) + { + if (mdatoms) + { + nChargePerturbed = mdatoms->nChargePerturbed; + if (EVDW_PME(inputrec->vdwtype)) + { + nTypePerturbed = mdatoms->nTypePerturbed; + } + } + if (cr->npmenodes > 0) + { + /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ + gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr); + gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr); + } + + if (cr->duty & DUTY_PME) + { + status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec, + mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed, + (Flags & MD_REPRODUCIBLE), nthreads_pme); + if (status != 0) + { + gmx_fatal(FARGS, "Error %d initializing PME", status); + } + } + } + + + if (EI_DYNAMICS(inputrec->eI)) + { + /* Turn on signal handling on all nodes */ + /* + * (A user signal from the PME nodes (if any) + * is communicated to the PP nodes. + */ + signal_handler_install(); + } + + if (cr->duty & DUTY_PP) + { + /* Assumes uniform use of the number of OpenMP threads */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); + + if (inputrec->bPull) + { + /* Initialize pull code */ + inputrec->pull_work = + init_pull(fplog, inputrec->pull, inputrec, nfile, fnm, + mtop, cr, oenv, inputrec->fepvals->init_lambda, + EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags); + } + + if (inputrec->bRot) + { + /* Initialize enforced rotation code */ + init_rot(fplog, inputrec, nfile, fnm, cr, state->x, state->box, mtop, oenv, + bVerbose, Flags); + } + + constr = init_constraints(fplog, mtop, inputrec, ed, state, cr); + + if (DOMAINDECOMP(cr)) + { + GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); + /* This call is not included in init_domain_decomposition mainly + * because fr->cginfo_mb is set later. + */ + dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec, + Flags & MD_DDBONDCHECK, fr->cginfo_mb); + } + + /* Now do whatever the user wants us to do (how flexible...) */ + my_integrator(inputrec->eI) (fplog, cr, nfile, fnm, + oenv, bVerbose, + nstglobalcomm, + vsite, constr, + nstepout, inputrec, mtop, + fcd, state, + mdatoms, nrnb, wcycle, ed, fr, + repl_ex_nst, repl_ex_nex, repl_ex_seed, + cpt_period, max_hours, + imdport, + Flags, + walltime_accounting); + + if (inputrec->bRot) + { + finish_rot(inputrec->rot); + } + + if (inputrec->bPull) + { + finish_pull(inputrec->pull_work); + } + + } + else + { + GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); + /* do PME only */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); + gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec); + } + + wallcycle_stop(wcycle, ewcRUN); + + /* Finish up, write some stuff + * if rerunMD, don't write last frame again + */ + finish_run(fplog, cr, + inputrec, nrnb, wcycle, walltime_accounting, + fr ? fr->nbv : NULL, + EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr)); + + + /* Free GPU memory and context */ + free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL); + + gmx_hardware_info_free(hwinfo); + + /* Does what it says */ + print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); + walltime_accounting_destroy(walltime_accounting); + + /* PLUMED */ + if(plumedswitch){ + plumed_finalize(plumedmain); + } + /* END PLUMED */ + + /* Close logfile already here if we were appending to it */ + if (MASTER(cr) && (Flags & MD_APPENDFILES)) + { + gmx_log_close(fplog); + } + + rc = (int)gmx_get_stop_condition(); + + done_ed(&ed); + +#if GMX_THREAD_MPI + /* we need to join all threads. The sub-threads join when they + exit this function, but the master thread needs to be told to + wait for that. */ + if (PAR(cr) && MASTER(cr)) + { + tMPI_Finalize(); + } +#endif + + return rc; +} + +} // namespace gmx diff --git a/patches/gromacs-2016-beta1.diff/src/programs/mdrun/runner.cpp.preplumed b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/runner.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..3294776ff121cf4b9ee740de07be997e6636fe2d --- /dev/null +++ b/patches/gromacs-2016-beta1.diff/src/programs/mdrun/runner.cpp.preplumed @@ -0,0 +1,1386 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements the MD runner routine calling all integrators. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "runner.h" + +#include "config.h" + +#include <assert.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> + +#include <algorithm> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/essentialdynamics/edsam.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/fileio/checkpoint.h" +#include "gromacs/fileio/oenv.h" +#include "gromacs/fileio/tpxio.h" +#include "gromacs/gmxlib/md_logging.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/hardware/cpuinfo.h" +#include "gromacs/hardware/detecthardware.h" +#include "gromacs/listed-forces/disre.h" +#include "gromacs/listed-forces/orires.h" +#include "gromacs/math/calculate-ewald-splitting-coefficient.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/calc_verletbuf.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/integrator.h" +#include "gromacs/mdlib/main.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/minimize.h" +#include "gromacs/mdlib/nbnxn_search.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tpi.h" +#include "gromacs/mdrunutility/threadaffinity.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/pulling/pull_rotation.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/smalloc.h" + +#include "deform.h" +#include "md.h" +#include "repl_ex.h" +#include "resource-division.h" + +#ifdef GMX_FAHCORE +#include "corewrap.h" +#endif + +//! First step used in pressure scaling +gmx_int64_t deform_init_init_step_tpx; +//! Initial box for pressure scaling +matrix deform_init_box_tpx; +//! MPI variable for use in pressure scaling +tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER; + +#if GMX_THREAD_MPI +/* The minimum number of atoms per tMPI thread. With fewer atoms than this, + * the number of threads will get lowered. + */ +#define MIN_ATOMS_PER_MPI_THREAD 90 +#define MIN_ATOMS_PER_GPU 900 + +struct mdrunner_arglist +{ + gmx_hw_opt_t hw_opt; + FILE *fplog; + t_commrec *cr; + int nfile; + const t_filenm *fnm; + const gmx_output_env_t *oenv; + gmx_bool bVerbose; + int nstglobalcomm; + ivec ddxyz; + int dd_rank_order; + int npme; + real rdd; + real rconstr; + const char *dddlb_opt; + real dlb_scale; + const char *ddcsx; + const char *ddcsy; + const char *ddcsz; + const char *nbpu_opt; + int nstlist_cmdline; + gmx_int64_t nsteps_cmdline; + int nstepout; + int resetstep; + int nmultisim; + int repl_ex_nst; + int repl_ex_nex; + int repl_ex_seed; + real pforce; + real cpt_period; + real max_hours; + int imdport; + unsigned long Flags; +}; + + +/* The function used for spawning threads. Extracts the mdrunner() + arguments from its one argument and calls mdrunner(), after making + a commrec. */ +static void mdrunner_start_fn(void *arg) +{ + try + { + struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg; + struct mdrunner_arglist mc = *mda; /* copy the arg list to make sure + that it's thread-local. This doesn't + copy pointed-to items, of course, + but those are all const. */ + t_commrec *cr; /* we need a local version of this */ + FILE *fplog = NULL; + t_filenm *fnm; + + fnm = dup_tfn(mc.nfile, mc.fnm); + + cr = reinitialize_commrec_for_this_thread(mc.cr); + + if (MASTER(cr)) + { + fplog = mc.fplog; + } + + gmx::mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, + mc.bVerbose, mc.nstglobalcomm, + mc.ddxyz, mc.dd_rank_order, mc.npme, mc.rdd, + mc.rconstr, mc.dddlb_opt, mc.dlb_scale, + mc.ddcsx, mc.ddcsy, mc.ddcsz, + mc.nbpu_opt, mc.nstlist_cmdline, + mc.nsteps_cmdline, mc.nstepout, mc.resetstep, + mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, + mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +} + + +/* called by mdrunner() to start a specific number of threads (including + the main thread) for thread-parallel runs. This in turn calls mdrunner() + for each thread. + All options besides nthreads are the same as for mdrunner(). */ +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, + FILE *fplog, t_commrec *cr, int nfile, + const t_filenm fnm[], const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + ivec ddxyz, int dd_rank_order, int npme, + real rdd, real rconstr, + const char *dddlb_opt, real dlb_scale, + const char *ddcsx, const char *ddcsy, const char *ddcsz, + const char *nbpu_opt, int nstlist_cmdline, + gmx_int64_t nsteps_cmdline, + int nstepout, int resetstep, + int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real pforce, real cpt_period, real max_hours, + unsigned long Flags) +{ + int ret; + struct mdrunner_arglist *mda; + t_commrec *crn; /* the new commrec */ + t_filenm *fnmn; + + /* first check whether we even need to start tMPI */ + if (hw_opt->nthreads_tmpi < 2) + { + return cr; + } + + /* a few small, one-time, almost unavoidable memory leaks: */ + snew(mda, 1); + fnmn = dup_tfn(nfile, fnm); + + /* fill the data structure to pass as void pointer to thread start fn */ + /* hw_opt contains pointers, which should all be NULL at this stage */ + mda->hw_opt = *hw_opt; + mda->fplog = fplog; + mda->cr = cr; + mda->nfile = nfile; + mda->fnm = fnmn; + mda->oenv = oenv; + mda->bVerbose = bVerbose; + mda->nstglobalcomm = nstglobalcomm; + mda->ddxyz[XX] = ddxyz[XX]; + mda->ddxyz[YY] = ddxyz[YY]; + mda->ddxyz[ZZ] = ddxyz[ZZ]; + mda->dd_rank_order = dd_rank_order; + mda->npme = npme; + mda->rdd = rdd; + mda->rconstr = rconstr; + mda->dddlb_opt = dddlb_opt; + mda->dlb_scale = dlb_scale; + mda->ddcsx = ddcsx; + mda->ddcsy = ddcsy; + mda->ddcsz = ddcsz; + mda->nbpu_opt = nbpu_opt; + mda->nstlist_cmdline = nstlist_cmdline; + mda->nsteps_cmdline = nsteps_cmdline; + mda->nstepout = nstepout; + mda->resetstep = resetstep; + mda->nmultisim = nmultisim; + mda->repl_ex_nst = repl_ex_nst; + mda->repl_ex_nex = repl_ex_nex; + mda->repl_ex_seed = repl_ex_seed; + mda->pforce = pforce; + mda->cpt_period = cpt_period; + mda->max_hours = max_hours; + mda->Flags = Flags; + + /* now spawn new threads that start mdrunner_start_fn(), while + the main thread returns, we set thread affinity later */ + ret = tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE, + mdrunner_start_fn, (void*)(mda) ); + if (ret != TMPI_SUCCESS) + { + return NULL; + } + + crn = reinitialize_commrec_for_this_thread(cr); + return crn; +} + +#endif /* GMX_THREAD_MPI */ + + +/*! \brief Cost of non-bonded kernels + * + * We determine the extra cost of the non-bonded kernels compared to + * a reference nstlist value of 10 (which is the default in grompp). + */ +static const int nbnxnReferenceNstlist = 10; +//! The values to try when switching +const int nstlist_try[] = { 20, 25, 40 }; +//! Number of elements in the neighborsearch list trials. +#define NNSTL sizeof(nstlist_try)/sizeof(nstlist_try[0]) +/* Increase nstlist until the non-bonded cost increases more than listfac_ok, + * but never more than listfac_max. + * A standard (protein+)water system at 300K with PME ewald_rtol=1e-5 + * needs 1.28 at rcoulomb=0.9 and 1.24 at rcoulomb=1.0 to get to nstlist=40. + * Note that both CPU and GPU factors are conservative. Performance should + * not go down due to this tuning, except with a relatively slow GPU. + * On the other hand, at medium/high parallelization or with fast GPUs + * nstlist will not be increased enough to reach optimal performance. + */ +/* CPU: pair-search is about a factor 1.5 slower than the non-bonded kernel */ +//! Max OK performance ratio beween force calc and neighbor searching +static const float nbnxn_cpu_listfac_ok = 1.05; +//! Too high performance ratio beween force calc and neighbor searching +static const float nbnxn_cpu_listfac_max = 1.09; +/* CPU: pair-search is about a factor 2-3 slower than the non-bonded kernel */ +//! Max OK performance ratio beween force calc and neighbor searching +static const float nbnxn_knl_listfac_ok = 1.22; +//! Too high performance ratio beween force calc and neighbor searching +static const float nbnxn_knl_listfac_max = 1.3; +/* GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel */ +//! Max OK performance ratio beween force calc and neighbor searching +static const float nbnxn_gpu_listfac_ok = 1.20; +//! Too high performance ratio beween force calc and neighbor searching +static const float nbnxn_gpu_listfac_max = 1.30; + +/*! \brief Try to increase nstlist when using the Verlet cut-off scheme */ +static void increase_nstlist(FILE *fp, t_commrec *cr, + t_inputrec *ir, int nstlist_cmdline, + const gmx_mtop_t *mtop, matrix box, + gmx_bool bGPU, const gmx::CpuInfo &cpuinfo) +{ + float listfac_ok, listfac_max; + int nstlist_orig, nstlist_prev; + verletbuf_list_setup_t ls; + real rlistWithReferenceNstlist, rlist_inc, rlist_ok, rlist_max; + real rlist_new, rlist_prev; + size_t nstlist_ind = 0; + t_state state_tmp; + gmx_bool bBox, bDD, bCont; + const char *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n"; + const char *nve_err = "Can not increase nstlist because an NVE ensemble is used"; + const char *vbd_err = "Can not increase nstlist because verlet-buffer-tolerance is not set or used"; + const char *box_err = "Can not increase nstlist because the box is too small"; + const char *dd_err = "Can not increase nstlist because of domain decomposition limitations"; + char buf[STRLEN]; + + if (nstlist_cmdline <= 0) + { + if (ir->nstlist == 1) + { + /* The user probably set nstlist=1 for a reason, + * don't mess with the settings. + */ + return; + } + + if (fp != NULL && bGPU && ir->nstlist < nstlist_try[0]) + { + fprintf(fp, nstl_gpu, ir->nstlist); + } + nstlist_ind = 0; + while (nstlist_ind < NNSTL && ir->nstlist >= nstlist_try[nstlist_ind]) + { + nstlist_ind++; + } + if (nstlist_ind == NNSTL) + { + /* There are no larger nstlist value to try */ + return; + } + } + + if (EI_MD(ir->eI) && ir->etc == etcNO) + { + if (MASTER(cr)) + { + fprintf(stderr, "%s\n", nve_err); + } + if (fp != NULL) + { + fprintf(fp, "%s\n", nve_err); + } + + return; + } + + if (ir->verletbuf_tol == 0 && bGPU) + { + gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp"); + } + + if (ir->verletbuf_tol < 0) + { + if (MASTER(cr)) + { + fprintf(stderr, "%s\n", vbd_err); + } + if (fp != NULL) + { + fprintf(fp, "%s\n", vbd_err); + } + + return; + } + + if (bGPU) + { + listfac_ok = nbnxn_gpu_listfac_ok; + listfac_max = nbnxn_gpu_listfac_max; + } + else if (cpuinfo.feature(gmx::CpuInfo::Feature::X86_Avx512ER)) + { + listfac_ok = nbnxn_knl_listfac_ok; + listfac_max = nbnxn_knl_listfac_max; + } + else + { + listfac_ok = nbnxn_cpu_listfac_ok; + listfac_max = nbnxn_cpu_listfac_max; + } + + nstlist_orig = ir->nstlist; + if (nstlist_cmdline > 0) + { + if (fp) + { + sprintf(buf, "Getting nstlist=%d from command line option", + nstlist_cmdline); + } + ir->nstlist = nstlist_cmdline; + } + + verletbuf_get_list_setup(TRUE, bGPU, &ls); + + /* Allow rlist to make the list a given factor larger than the list + * would be with the reference value for nstlist (10). + */ + nstlist_prev = ir->nstlist; + ir->nstlist = nbnxnReferenceNstlist; + calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, + &rlistWithReferenceNstlist); + ir->nstlist = nstlist_prev; + + /* Determine the pair list size increase due to zero interactions */ + rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j, + mtop->natoms/det(box)); + rlist_ok = (rlistWithReferenceNstlist + rlist_inc)*std::cbrt(listfac_ok) - rlist_inc; + rlist_max = (rlistWithReferenceNstlist + rlist_inc)*std::cbrt(listfac_max) - rlist_inc; + if (debug) + { + fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n", + rlist_inc, rlist_ok, rlist_max); + } + + nstlist_prev = nstlist_orig; + rlist_prev = ir->rlist; + do + { + if (nstlist_cmdline <= 0) + { + ir->nstlist = nstlist_try[nstlist_ind]; + } + + /* Set the pair-list buffer size in ir */ + calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new); + + /* Does rlist fit in the box? */ + bBox = (gmx::square(rlist_new) < max_cutoff2(ir->ePBC, box)); + bDD = TRUE; + if (bBox && DOMAINDECOMP(cr)) + { + /* Check if rlist fits in the domain decomposition */ + if (inputrec2nboundeddim(ir) < DIM) + { + gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet"); + } + copy_mat(box, state_tmp.box); + bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new); + } + + if (debug) + { + fprintf(debug, "nstlist %d rlist %.3f bBox %d bDD %d\n", + ir->nstlist, rlist_new, bBox, bDD); + } + + bCont = FALSE; + + if (nstlist_cmdline <= 0) + { + if (bBox && bDD && rlist_new <= rlist_max) + { + /* Increase nstlist */ + nstlist_prev = ir->nstlist; + rlist_prev = rlist_new; + bCont = (nstlist_ind+1 < NNSTL && rlist_new < rlist_ok); + } + else + { + /* Stick with the previous nstlist */ + ir->nstlist = nstlist_prev; + rlist_new = rlist_prev; + bBox = TRUE; + bDD = TRUE; + } + } + + nstlist_ind++; + } + while (bCont); + + if (!bBox || !bDD) + { + gmx_warning(!bBox ? box_err : dd_err); + if (fp != NULL) + { + fprintf(fp, "\n%s\n", bBox ? box_err : dd_err); + } + ir->nstlist = nstlist_orig; + } + else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist) + { + sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g", + nstlist_orig, ir->nstlist, + ir->rlist, rlist_new); + if (MASTER(cr)) + { + fprintf(stderr, "%s\n\n", buf); + } + if (fp != NULL) + { + fprintf(fp, "%s\n\n", buf); + } + ir->rlist = rlist_new; + } +} + +/*! \brief Initialize variables for Verlet scheme simulation */ +static void prepare_verlet_scheme(FILE *fplog, + t_commrec *cr, + t_inputrec *ir, + int nstlist_cmdline, + const gmx_mtop_t *mtop, + matrix box, + gmx_bool bUseGPU, + const gmx::CpuInfo &cpuinfo) +{ + /* For NVE simulations, we will retain the initial list buffer */ + if (EI_DYNAMICS(ir->eI) && + ir->verletbuf_tol > 0 && + !(EI_MD(ir->eI) && ir->etc == etcNO)) + { + /* Update the Verlet buffer size for the current run setup */ + verletbuf_list_setup_t ls; + real rlist_new; + + /* Here we assume SIMD-enabled kernels are being used. But as currently + * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 + * and 4x2 gives a larger buffer than 4x4, this is ok. + */ + verletbuf_get_list_setup(TRUE, bUseGPU, &ls); + + calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new); + + if (rlist_new != ir->rlist) + { + if (fplog != NULL) + { + fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", + ir->rlist, rlist_new, + ls.cluster_size_i, ls.cluster_size_j); + } + ir->rlist = rlist_new; + } + } + + if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0)) + { + gmx_fatal(FARGS, "Can not set nstlist without %s", + !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance"); + } + + if (EI_DYNAMICS(ir->eI)) + { + /* Set or try nstlist values */ + increase_nstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, bUseGPU, cpuinfo); + } +} + +/*! \brief Override the nslist value in inputrec + * + * with value passed on the command line (if any) + */ +static void override_nsteps_cmdline(FILE *fplog, + gmx_int64_t nsteps_cmdline, + t_inputrec *ir, + const t_commrec *cr) +{ + assert(ir); + assert(cr); + + /* override with anything else than the default -2 */ + if (nsteps_cmdline > -2) + { + char sbuf_steps[STEPSTRSIZE]; + char sbuf_msg[STRLEN]; + + ir->nsteps = nsteps_cmdline; + if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1) + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps", + gmx_step_str(nsteps_cmdline, sbuf_steps), + fabs(nsteps_cmdline*ir->delta_t)); + } + else + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps", + gmx_step_str(nsteps_cmdline, sbuf_steps)); + } + + md_print_warn(cr, fplog, "%s\n", sbuf_msg); + } + else if (nsteps_cmdline < -2) + { + gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %d", + nsteps_cmdline); + } + /* Do nothing if nsteps_cmdline == -2 */ +} + +namespace gmx +{ + +//! \brief Return the correct integrator function. +static integrator_t *my_integrator(unsigned int ei) +{ + switch (ei) + { + case eiMD: + case eiBD: + case eiSD1: + case eiVV: + case eiVVAK: + if (!EI_DYNAMICS(ei)) + { + GMX_THROW(APIError("do_md integrator would be called for a non-dynamical integrator")); + } + return do_md; + case eiSteep: + return do_steep; + case eiCG: + return do_cg; + case eiNM: + return do_nm; + case eiLBFGS: + return do_lbfgs; + case eiTPI: + case eiTPIC: + if (!EI_TPI(ei)) + { + GMX_THROW(APIError("do_tpi integrator would be called for a non-TPI integrator")); + } + return do_tpi; + case eiSD2_REMOVED: + GMX_THROW(NotImplementedError("SD2 integrator has been removed")); + default: + GMX_THROW(APIError("Non existing integrator selected")); + } +} + +int mdrunner(gmx_hw_opt_t *hw_opt, + FILE *fplog, t_commrec *cr, int nfile, + const t_filenm fnm[], const gmx_output_env_t *oenv, gmx_bool bVerbose, + int nstglobalcomm, + ivec ddxyz, int dd_rank_order, int npme, real rdd, real rconstr, + const char *dddlb_opt, real dlb_scale, + const char *ddcsx, const char *ddcsy, const char *ddcsz, + const char *nbpu_opt, int nstlist_cmdline, + gmx_int64_t nsteps_cmdline, int nstepout, int resetstep, + int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex, + int repl_ex_seed, real pforce, real cpt_period, real max_hours, + int imdport, unsigned long Flags) +{ + gmx_bool bForceUseGPU, bTryUseGPU, bRerunMD; + t_inputrec *inputrec; + t_state *state = NULL; + matrix box; + gmx_ddbox_t ddbox = {0}; + int npme_major, npme_minor; + t_nrnb *nrnb; + gmx_mtop_t *mtop = NULL; + t_mdatoms *mdatoms = NULL; + t_forcerec *fr = NULL; + t_fcdata *fcd = NULL; + real ewaldcoeff_q = 0; + real ewaldcoeff_lj = 0; + struct gmx_pme_t **pmedata = NULL; + gmx_vsite_t *vsite = NULL; + gmx_constr_t constr; + int nChargePerturbed = -1, nTypePerturbed = 0, status; + gmx_wallcycle_t wcycle; + gmx_walltime_accounting_t walltime_accounting = NULL; + int rc; + gmx_int64_t reset_counters; + gmx_edsam_t ed = NULL; + int nthreads_pme = 1; + gmx_hw_info_t *hwinfo = NULL; + /* The master rank decides early on bUseGPU and broadcasts this later */ + gmx_bool bUseGPU = FALSE; + + /* CAUTION: threads may be started later on in this function, so + cr doesn't reflect the final parallel state right now */ + snew(inputrec, 1); + snew(mtop, 1); + + if (Flags & MD_APPENDFILES) + { + fplog = NULL; + } + + bRerunMD = (Flags & MD_RERUN); + bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0); + bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU; + + /* Detect hardware, gather information. This is an operation that is + * global for this process (MPI rank). */ + hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU); + + gmx_print_detected_hardware(fplog, cr, hwinfo); + + if (fplog != NULL) + { + /* Print references after all software/hardware printing */ + please_cite(fplog, "Abraham2015"); + please_cite(fplog, "Pall2015"); + please_cite(fplog, "Pronk2013"); + please_cite(fplog, "Hess2008b"); + please_cite(fplog, "Spoel2005a"); + please_cite(fplog, "Lindahl2001a"); + please_cite(fplog, "Berendsen95a"); + } + + snew(state, 1); + if (SIMMASTER(cr)) + { + /* Read (nearly) all data required for the simulation */ + read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, mtop); + + if (inputrec->cutoff_scheme == ecutsVERLET) + { + /* Here the master rank decides if all ranks will use GPUs */ + bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 || + getenv("GMX_EMULATE_GPU") != NULL); + + /* TODO add GPU kernels for this and replace this check by: + * (bUseGPU && (ir->vdwtype == evdwPME && + * ir->ljpme_combination_rule == eljpmeLB)) + * update the message text and the content of nbnxn_acceleration_supported. + */ + if (bUseGPU && + !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD)) + { + /* Fallback message printed by nbnxn_acceleration_supported */ + if (bForceUseGPU) + { + gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings"); + } + bUseGPU = FALSE; + } + + prepare_verlet_scheme(fplog, cr, + inputrec, nstlist_cmdline, mtop, state->box, + bUseGPU, *hwinfo->cpuInfo); + } + else + { + if (nstlist_cmdline > 0) + { + gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme"); + } + + if (hwinfo->gpu_info.n_dev_compatible > 0) + { + md_print_warn(cr, fplog, + "NOTE: GPU(s) found, but the current simulation can not use GPUs\n" + " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"); + } + + if (bForceUseGPU) + { + gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet"); + } + +#if GMX_TARGET_BGQ + md_print_warn(cr, fplog, + "NOTE: There is no SIMD implementation of the group scheme kernels on\n" + " BlueGene/Q. You will observe better performance from using the\n" + " Verlet cut-off scheme.\n"); +#endif + } + } + + /* Check and update the hardware options for internal consistency */ + check_and_update_hw_opt_1(hw_opt, cr, npme); + + /* Early check for externally set process affinity. */ + gmx_check_thread_affinity_set(fplog, cr, + hw_opt, hwinfo->nthreads_hw_avail, FALSE); + +#if GMX_THREAD_MPI + if (SIMMASTER(cr)) + { + if (npme > 0 && hw_opt->nthreads_tmpi <= 0) + { + gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks"); + } + + /* Since the master knows the cut-off scheme, update hw_opt for this. + * This is done later for normal MPI and also once more with tMPI + * for all tMPI ranks. + */ + check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); + + /* NOW the threads will be started: */ + hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo, + hw_opt, + inputrec, mtop, + cr, fplog, bUseGPU); + + if (hw_opt->nthreads_tmpi > 1) + { + t_commrec *cr_old = cr; + /* now start the threads. */ + cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, + oenv, bVerbose, nstglobalcomm, + ddxyz, dd_rank_order, npme, rdd, rconstr, + dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz, + nbpu_opt, nstlist_cmdline, + nsteps_cmdline, nstepout, resetstep, nmultisim, + repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce, + cpt_period, max_hours, + Flags); + /* the main thread continues here with a new cr. We don't deallocate + the old cr because other threads may still be reading it. */ + if (cr == NULL) + { + gmx_comm("Failed to spawn threads"); + } + } + } +#endif + /* END OF CAUTION: cr is now reliable */ + + if (PAR(cr)) + { + /* now broadcast everything to the non-master nodes/threads: */ + init_parallel(cr, inputrec, mtop); + + /* The master rank decided on the use of GPUs, + * broadcast this information to all ranks. + */ + gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr); + } + + if (fplog != NULL) + { + pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE); + fprintf(fplog, "\n"); + } + + /* now make sure the state is initialized and propagated */ + set_state_entries(state, inputrec); + + /* A parallel command line option consistency check that we can + only do after any threads have started. */ + if (!PAR(cr) && + (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || npme > 0)) + { + gmx_fatal(FARGS, + "The -dd or -npme option request a parallel simulation, " +#if !GMX_MPI + "but %s was compiled without threads or MPI enabled" +#else +#if GMX_THREAD_MPI + "but the number of MPI-threads (option -ntmpi) is not set or is 1" +#else + "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec" +#endif +#endif + , output_env_get_program_display_name(oenv) + ); + } + + if (bRerunMD && + (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) + { + gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun"); + } + + if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank"); + } + + if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) + { + if (npme > 0) + { + gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), + "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ"); + } + + npme = 0; + } + + if (bUseGPU && npme < 0) + { + /* With GPUs we don't automatically use PME-only ranks. PME ranks can + * improve performance with many threads per GPU, since our OpenMP + * scaling is bad, but it's difficult to automate the setup. + */ + npme = 0; + } + +#ifdef GMX_FAHCORE + if (MASTER(cr)) + { + fcRegisterSteps(inputrec->nsteps, inputrec->init_step); + } +#endif + + /* NMR restraints must be initialized before load_checkpoint, + * since with time averaging the history is added to t_state. + * For proper consistency check we therefore need to extend + * t_state here. + * So the PME-only nodes (if present) will also initialize + * the distance restraints. + */ + snew(fcd, 1); + + /* This needs to be called before read_checkpoint to extend the state */ + init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0); + + init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires), + state); + + if (inputrecDeform(inputrec)) + { + /* Store the deform reference box before reading the checkpoint */ + if (SIMMASTER(cr)) + { + copy_mat(state->box, box); + } + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + /* Because we do not have the update struct available yet + * in which the reference values should be stored, + * we store them temporarily in static variables. + * This should be thread safe, since they are only written once + * and with identical values. + */ + tMPI_Thread_mutex_lock(&deform_init_box_mutex); + deform_init_init_step_tpx = inputrec->init_step; + copy_mat(box, deform_init_box_tpx); + tMPI_Thread_mutex_unlock(&deform_init_box_mutex); + } + + if (Flags & MD_STARTFROMCPT) + { + /* Check if checkpoint file exists before doing continuation. + * This way we can use identical input options for the first and subsequent runs... + */ + gmx_bool bReadEkin; + + load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog, + cr, ddxyz, &npme, + inputrec, state, &bReadEkin, + (Flags & MD_APPENDFILES), + (Flags & MD_APPENDFILESSET)); + + if (bReadEkin) + { + Flags |= MD_READ_EKIN; + } + } + + if (MASTER(cr) && (Flags & MD_APPENDFILES)) + { + gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, + Flags, &fplog); + } + + /* override nsteps with value from cmdline */ + override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr); + + if (SIMMASTER(cr)) + { + copy_mat(state->box, box); + } + + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + + // TODO This should move to do_md(), because it only makes sense + // with dynamical integrators, but there is no test coverage and + // it interacts with constraints, somehow. + /* Essential dynamics */ + if (opt2bSet("-ei", nfile, fnm)) + { + /* Open input and output files, allocate space for ED data structure */ + ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr); + } + + if (PAR(cr) && !(EI_TPI(inputrec->eI) || + inputrec->eI == eiNM)) + { + cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, npme, + dd_rank_order, + rdd, rconstr, + dddlb_opt, dlb_scale, + ddcsx, ddcsy, ddcsz, + mtop, inputrec, + box, state->x, + &ddbox, &npme_major, &npme_minor); + } + else + { + /* PME, if used, is done on all nodes with 1D decomposition */ + cr->npmenodes = 0; + cr->duty = (DUTY_PP | DUTY_PME); + npme_major = 1; + npme_minor = 1; + + if (inputrec->ePBC == epbcSCREW) + { + gmx_fatal(FARGS, + "pbc=%s is only implemented with domain decomposition", + epbc_names[inputrec->ePBC]); + } + } + + if (PAR(cr)) + { + /* After possible communicator splitting in make_dd_communicators. + * we can set up the intra/inter node communication. + */ + gmx_setup_nodecomm(fplog, cr); + } + + /* Initialize per-physical-node MPI process/thread ID and counters. */ + gmx_init_intranode_counters(cr); +#if GMX_MPI + if (MULTISIM(cr)) + { + md_print_info(cr, fplog, + "This is simulation %d out of %d running as a composite GROMACS\n" + "multi-simulation job. Setup for this simulation:\n\n", + cr->ms->sim, cr->ms->nsim); + } + md_print_info(cr, fplog, "Using %d MPI %s\n", + cr->nnodes, +#if GMX_THREAD_MPI + cr->nnodes == 1 ? "thread" : "threads" +#else + cr->nnodes == 1 ? "process" : "processes" +#endif + ); + fflush(stderr); +#endif + + /* Check and update hw_opt for the cut-off scheme */ + check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme); + + /* Check and update hw_opt for the number of MPI ranks */ + check_and_update_hw_opt_3(hw_opt); + + gmx_omp_nthreads_init(fplog, cr, + hwinfo->nthreads_hw_avail, + hw_opt->nthreads_omp, + hw_opt->nthreads_omp_pme, + (cr->duty & DUTY_PP) == 0, + inputrec->cutoff_scheme == ecutsVERLET); + +#ifndef NDEBUG + if (EI_TPI(inputrec->eI) && + inputrec->cutoff_scheme == ecutsVERLET) + { + gmx_feenableexcept(); + } +#endif + + if (bUseGPU) + { + /* Select GPU id's to use */ + gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU, + &hw_opt->gpu_opt); + } + else + { + /* Ignore (potentially) manually selected GPUs */ + hw_opt->gpu_opt.n_dev_use = 0; + } + + /* check consistency across ranks of things like SIMD + * support and number of GPUs selected */ + gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU); + + /* Now that we know the setup is consistent, check for efficiency */ + check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET, + cr, fplog); + + if (DOMAINDECOMP(cr)) + { + /* When we share GPUs over ranks, we need to know this for the DLB */ + dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt); + } + + /* getting number of PP/PME threads + PME: env variable should be read only on one node to make sure it is + identical everywhere; + */ + nthreads_pme = gmx_omp_nthreads_get(emntPME); + + wcycle = wallcycle_init(fplog, resetstep, cr); + + if (PAR(cr)) + { + /* Master synchronizes its value of reset_counters with all nodes + * including PME only nodes */ + reset_counters = wcycle_get_reset_counters(wcycle); + gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr); + wcycle_set_reset_counters(wcycle, reset_counters); + } + + snew(nrnb, 1); + if (cr->duty & DUTY_PP) + { + bcast_state(cr, state); + + /* Initiate forcerecord */ + fr = mk_forcerec(); + fr->hwinfo = hwinfo; + fr->gpu_opt = &hw_opt->gpu_opt; + init_forcerec(fplog, fr, fcd, inputrec, mtop, cr, box, + opt2fn("-table", nfile, fnm), + opt2fn("-tablep", nfile, fnm), + opt2fn("-tableb", nfile, fnm), + nbpu_opt, + FALSE, + pforce); + + /* Initialize QM-MM */ + if (fr->bQMMM) + { + init_QMMMrec(cr, mtop, inputrec, fr); + } + + /* Initialize the mdatoms structure. + * mdatoms is not filled with atom data, + * as this can not be done now with domain decomposition. + */ + mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO); + + /* Initialize the virtual site communication */ + vsite = init_vsite(mtop, cr, FALSE); + + calc_shifts(box, fr->shift_vec); + + /* With periodic molecules the charge groups should be whole at start up + * and the virtual sites should not be far from their proper positions. + */ + if (!inputrec->bContinuation && MASTER(cr) && + !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols)) + { + /* Make molecules whole at start of run */ + if (fr->ePBC != epbcNONE) + { + do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x); + } + if (vsite) + { + /* Correct initial vsite positions are required + * for the initial distribution in the domain decomposition + * and for the initial shell prediction. + */ + construct_vsites_mtop(vsite, mtop, state->x); + } + } + + if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) + { + ewaldcoeff_q = fr->ewaldcoeff_q; + ewaldcoeff_lj = fr->ewaldcoeff_lj; + pmedata = &fr->pmedata; + } + else + { + pmedata = NULL; + } + } + else + { + /* This is a PME only node */ + + /* We don't need the state */ + done_state(state); + + ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); + ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); + snew(pmedata, 1); + } + + if (hw_opt->thread_affinity != threadaffOFF) + { + /* Before setting affinity, check whether the affinity has changed + * - which indicates that probably the OpenMP library has changed it + * since we first checked). + */ + gmx_check_thread_affinity_set(fplog, cr, + hw_opt, hwinfo->nthreads_hw_avail, TRUE); + + /* Set the CPU affinity */ + gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo); + } + + /* Initiate PME if necessary, + * either on all nodes or on dedicated PME nodes only. */ + if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) + { + if (mdatoms) + { + nChargePerturbed = mdatoms->nChargePerturbed; + if (EVDW_PME(inputrec->vdwtype)) + { + nTypePerturbed = mdatoms->nTypePerturbed; + } + } + if (cr->npmenodes > 0) + { + /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ + gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr); + gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr); + } + + if (cr->duty & DUTY_PME) + { + status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec, + mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed, + (Flags & MD_REPRODUCIBLE), nthreads_pme); + if (status != 0) + { + gmx_fatal(FARGS, "Error %d initializing PME", status); + } + } + } + + + if (EI_DYNAMICS(inputrec->eI)) + { + /* Turn on signal handling on all nodes */ + /* + * (A user signal from the PME nodes (if any) + * is communicated to the PP nodes. + */ + signal_handler_install(); + } + + if (cr->duty & DUTY_PP) + { + /* Assumes uniform use of the number of OpenMP threads */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); + + if (inputrec->bPull) + { + /* Initialize pull code */ + inputrec->pull_work = + init_pull(fplog, inputrec->pull, inputrec, nfile, fnm, + mtop, cr, oenv, inputrec->fepvals->init_lambda, + EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags); + } + + if (inputrec->bRot) + { + /* Initialize enforced rotation code */ + init_rot(fplog, inputrec, nfile, fnm, cr, state->x, state->box, mtop, oenv, + bVerbose, Flags); + } + + constr = init_constraints(fplog, mtop, inputrec, ed, state, cr); + + if (DOMAINDECOMP(cr)) + { + GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); + /* This call is not included in init_domain_decomposition mainly + * because fr->cginfo_mb is set later. + */ + dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec, + Flags & MD_DDBONDCHECK, fr->cginfo_mb); + } + + /* Now do whatever the user wants us to do (how flexible...) */ + my_integrator(inputrec->eI) (fplog, cr, nfile, fnm, + oenv, bVerbose, + nstglobalcomm, + vsite, constr, + nstepout, inputrec, mtop, + fcd, state, + mdatoms, nrnb, wcycle, ed, fr, + repl_ex_nst, repl_ex_nex, repl_ex_seed, + cpt_period, max_hours, + imdport, + Flags, + walltime_accounting); + + if (inputrec->bRot) + { + finish_rot(inputrec->rot); + } + + if (inputrec->bPull) + { + finish_pull(inputrec->pull_work); + } + + } + else + { + GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); + /* do PME only */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); + gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec); + } + + wallcycle_stop(wcycle, ewcRUN); + + /* Finish up, write some stuff + * if rerunMD, don't write last frame again + */ + finish_run(fplog, cr, + inputrec, nrnb, wcycle, walltime_accounting, + fr ? fr->nbv : NULL, + EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr)); + + + /* Free GPU memory and context */ + free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL); + + gmx_hardware_info_free(hwinfo); + + /* Does what it says */ + print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); + walltime_accounting_destroy(walltime_accounting); + + /* Close logfile already here if we were appending to it */ + if (MASTER(cr) && (Flags & MD_APPENDFILES)) + { + gmx_log_close(fplog); + } + + rc = (int)gmx_get_stop_condition(); + + done_ed(&ed); + +#if GMX_THREAD_MPI + /* we need to join all threads. The sub-threads join when they + exit this function, but the master thread needs to be told to + wait for that. */ + if (PAR(cr) && MASTER(cr)) + { + tMPI_Finalize(); + } +#endif + + return rc; +} + +} // namespace gmx