diff --git a/patches/gromacs-2019.1.config b/patches/gromacs-2019.1.config new file mode 100644 index 0000000000000000000000000000000000000000..4348d93fd2f9c9db2567ee332f7ba123c58b7abc --- /dev/null +++ b/patches/gromacs-2019.1.config @@ -0,0 +1,33 @@ + + +function plumed_preliminary_test(){ +# check if the README contains the word GROMACS and if gromacs has been already configured + grep -q GROMACS README 1>/dev/null 2>/dev/null +} + +function plumed_patch_info(){ +cat << EOF +PLUMED can be incorporated into gromacs using the standard patching procedure. +Patching must be done in the gromacs root directory _before_ the cmake command is invoked. + +On clusters you may want to patch gromacs using the static version of plumed, in this case +building gromacs can result in multiple errors. One possible solution is to configure gromacs +with these additional options: + +cmake -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON + +To enable PLUMED in a gromacs simulation one should use +mdrun with an extra -plumed flag. The flag can be used to +specify the name of the PLUMED input file, e.g.: + +gmx mdrun -plumed plumed.dat + +For more information on gromacs you should visit http://www.gromacs.org + +EOF +} + +plumed_before_patch(){ + plumed_patch_info +} + diff --git a/patches/gromacs-2019.1.diff/src/gromacs/CMakeLists.txt b/patches/gromacs-2019.1.diff/src/gromacs/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..80f7d30c13f328603ca75f3b37b4d1b0e97eb22c --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/CMakeLists.txt @@ -0,0 +1,446 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by +# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, +# and including many others, as listed in the AUTHORS file in the +# top-level source directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. + +set(LIBGROMACS_SOURCES) + +if (GMX_CLANG_CUDA) + include(gmxClangCudaUtils) +endif() + +set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +set_property(GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES) +set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS) +set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE) + +add_library(libgromacs_external OBJECT "") +if(CMAKE_COMPILER_IS_GNUCXX) + # Keep quiet about e.g. linearalgebra module + target_compile_options(libgromacs_external PRIVATE ${CXXFLAGS_NO_STRINGOP_TRUNCATION}) +endif() + +add_library(libgromacs_generated OBJECT "") +if (BUILD_SHARED_LIBS) + set_target_properties(libgromacs_external PROPERTIES POSITION_INDEPENDENT_CODE true) + set_target_properties(libgromacs_generated PROPERTIES POSITION_INDEPENDENT_CODE true) +endif() + +function (_gmx_add_files_to_property PROPERTY) + foreach (_file ${ARGN}) + if (IS_ABSOLUTE "${_file}") + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file}) + else() + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} + ${CMAKE_CURRENT_LIST_DIR}/${_file}) + endif() + endforeach() +endfunction () + +function (gmx_add_libgromacs_sources) + _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN}) +endfunction () + +# TODO Reconsider this, as the CUDA driver API is probably a simpler +# approach, at least for the build system. See Redmine #2530 +function (gmx_compile_cpp_as_cuda) + _gmx_add_files_to_property(GMX_LIBGROMACS_GPU_IMPL_SOURCES ${ARGN}) +endfunction () + +function (gmx_install_headers) + if (NOT GMX_BUILD_MDRUN_ONLY) + file(RELATIVE_PATH _dest ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_LIST_DIR}) + install(FILES ${ARGN} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${_dest}" + COMPONENT development) + endif() + _gmx_add_files_to_property(GMX_INSTALLED_HEADERS ${ARGN}) +endfunction () + +function (gmx_write_installed_header_list) + get_property(_list GLOBAL PROPERTY GMX_INSTALLED_HEADERS) + string(REPLACE ";" "\n" _list "${_list}") + # TODO: Make this only update the file timestamp if the contents actually change. + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt "${_list}") +endfunction() + +add_subdirectory(gmxlib) +add_subdirectory(mdlib) +add_subdirectory(applied-forces) +add_subdirectory(listed-forces) +add_subdirectory(commandline) +add_subdirectory(domdec) +add_subdirectory(ewald) +add_subdirectory(fft) +add_subdirectory(gpu_utils) +add_subdirectory(hardware) +add_subdirectory(linearalgebra) +add_subdirectory(math) +add_subdirectory(mdrun) +add_subdirectory(mdrunutility) +add_subdirectory(mdtypes) +add_subdirectory(onlinehelp) +add_subdirectory(options) +add_subdirectory(pbcutil) +add_subdirectory(random) +add_subdirectory(restraint) +add_subdirectory(tables) +add_subdirectory(taskassignment) +add_subdirectory(timing) +add_subdirectory(topology) +add_subdirectory(trajectory) +add_subdirectory(utility) +add_subdirectory(fileio) +add_subdirectory(swap) +add_subdirectory(essentialdynamics) +add_subdirectory(pulling) +add_subdirectory(awh) +add_subdirectory(simd) +add_subdirectory(imd) +add_subdirectory(compat) +add_subdirectory(mimic) +if (NOT GMX_BUILD_MDRUN_ONLY) + add_subdirectory(gmxana) + add_subdirectory(gmxpreprocess) + add_subdirectory(correlationfunctions) + add_subdirectory(statistics) + add_subdirectory(analysisdata) + add_subdirectory(selection) + add_subdirectory(trajectoryanalysis) + add_subdirectory(energyanalysis) + add_subdirectory(tools) +endif() + +get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES}) + +# This would be the standard way to include thread_mpi, but +# we want libgromacs to link the functions directly +#if(GMX_THREAD_MPI) +# add_subdirectory(thread_mpi) +#endif() +#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) + +tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src) +target_sources(libgromacs_external PRIVATE ${THREAD_MPI_SOURCES}) + +configure_file(version.h.cmakein version.h) +gmx_install_headers( + analysisdata.h + commandline.h + options.h + random.h + selection.h + trajectoryanalysis.h + utility.h + ${CMAKE_CURRENT_BINARY_DIR}/version.h + ) + +# This code is here instead of utility/CMakeLists.txt, because CMake +# custom commands and source file properties can only be set in the directory +# that contains the target that uses them. +# TODO: Generate a header instead that can be included from baseversion.c. +# That probably simplifies things somewhat. +set(GENERATED_VERSION_FILE utility/baseversion-gen.cpp) +gmx_configure_version_file( + utility/baseversion-gen.cpp.cmakein ${GENERATED_VERSION_FILE} + REMOTE_HASH + EXTRA_VARS + GMX_SOURCE_DOI + ) +list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE} + $<TARGET_OBJECTS:libgromacs_external> + $<TARGET_OBJECTS:libgromacs_generated>) + +# Mark some shared GPU implementation files to compile with CUDA if needed +if (GMX_USE_CUDA) + get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES) + set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) +endif() + +# set up CUDA compilation with clang +if (GMX_CLANG_CUDA) + foreach (_file ${LIBGROMACS_SOURCES}) + get_filename_component(_ext ${_file} EXT) + get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT) + if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format) + gmx_compile_cuda_file_with_clang(${_file}) + endif() + endforeach() +endif() + +if (GMX_USE_CUDA) + # Work around FindCUDA that prevents using target_link_libraries() + # with keywords otherwise... + set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) + if (NOT GMX_CLANG_CUDA) + cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) + else() + add_library(libgromacs ${LIBGROMACS_SOURCES}) + endif() + target_link_libraries(libgromacs PRIVATE ${CUDA_CUFFT_LIBRARIES}) +else() + add_library(libgromacs ${LIBGROMACS_SOURCES}) +endif() + +if (GMX_USE_OPENCL) + option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE) + mark_as_advanced(GMX_EXTERNAL_CLFFT) + + # Default to using clFFT found on the system + # switch to quiet at the second run. + if (DEFINED clFFT_LIBRARY) + set (clFFT_FIND_QUIETLY TRUE) + endif() + find_package(clFFT) + if (NOT clFFT_FOUND) + if (GMX_EXTERNAL_CLFFT) + message(FATAL_ERROR "Did not find required external clFFT library, consider setting clFFT_ROOT_DIR") + endif() + + if(MSVC) + message(FATAL_ERROR +"An OpenCL build was requested with Visual Studio compiler, but GROMACS +requires clFFT, which was not found on your system. GROMACS does bundle +clFFT to help with building for OpenCL, but that clFFT has not yet been +ported to the more recent versions of that compiler that GROMACS itself +requires. Thus for now, OpenCL is not available with MSVC and the internal +build of clFFT in GROMACS 2019. Either change compiler, try installing +a clFFT package, or use the latest GROMACS 2018 point release.") + endif() + + # Fall back on the internal version + set (_clFFT_dir ../external/clFFT/src) + add_subdirectory(${_clFFT_dir} clFFT-build) + target_sources(libgromacs PRIVATE + $<TARGET_OBJECTS:clFFT> + ) + target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include) + # Use the magic variable for how to link any library needed for + # dlopen, etc. which is -ldl where needed, and empty otherwise + # (e.g. Windows, BSD, Mac). + target_link_libraries(libgromacs PRIVATE "${CMAKE_DL_LIBS}") + else() + target_link_libraries(libgromacs PRIVATE clFFT) + endif() +endif() + +# Recent versions of gcc and clang give warnings on scanner.cpp, which +# is a generated source file. These are awkward to suppress inline, so +# we do it in the compilation command (after testing that the compiler +# supports the suppressions). Same issue exists for nonbonded kernels +# so we supress them for all generated files. +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-Wno-unused -Wno-unused-parameter" HAS_NO_UNUSED) +check_cxx_compiler_flag(-Wno-missing-declarations HAS_NO_MISSING_DECL) +check_cxx_compiler_flag(-Wno-missing-prototypes HAS_NO_MISSING_PROTO) +check_cxx_compiler_flag(/wd4101 HAS_NO_MSVC_UNUSED) +if (NOT MSVC) + check_cxx_compiler_flag(-wd1419 HAS_DECL_IN_SOURCE) +endif() +if (HAS_NO_UNUSED) + target_compile_options(libgromacs_generated PRIVATE "-Wno-unused;-Wno-unused-parameter") +endif() +if (HAS_NO_MISSING_DECL) + target_compile_options(libgromacs_generated PRIVATE "-Wno-missing-declarations") +endif() +# TODO The group scheme kernels don't use proper function prototype +# declarations, and clang warns about such use, which we suppress +# rather than fix. We would prefer to use no suppressions. However +# other compilers do not support such a warning suppression for C++ +# source files, and issue warnings about that. Remove the use of +# -Wno-missing-prototypes here and above when the group scheme is +# removed. +if (HAS_NO_MISSING_PROTO AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + target_compile_options(libgromacs_generated PRIVATE "-Wno-missing-prototypes") +endif() +if (HAS_NO_MSVC_UNUSED) + target_compile_options(libgromacs_generated PRIVATE "/wd4101") +endif() +if (HAS_DECL_IN_SOURCE) + target_compile_options(libgromacs_generated PRIVATE "-wd1419") +endif() + +if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL")) + # Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file. + # On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions + # with the SIMD_AVX_512_CXX_FLAGS flags. + set_source_files_properties(hardware/identifyavx512fmaunits.cpp PROPERTIES COMPILE_FLAGS "${SIMD_AVX_512_CXX_FLAGS} ${CXX_NO_UNUSED_OPTION_WARNING_FLAGS}") +endif() + +gmx_setup_tng_for_libgromacs() + +target_link_libraries(libgromacs + PRIVATE + ${EXTRAE_LIBRARIES} + ${GMX_EXTRA_LIBRARIES} + ${GMX_COMMON_LIBRARIES} + ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} + ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} + ${OpenCL_LIBRARIES} + ${GMX_STDLIB_LIBRARIES} + PUBLIC + ${GMX_PUBLIC_LIBRARIES} + ${PLUMED_LOAD} + ) +set_target_properties(libgromacs PROPERTIES + OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" + SOVERSION ${LIBRARY_SOVERSION_MAJOR} + VERSION ${LIBRARY_VERSION} + COMPILE_FLAGS "${OpenMP_C_FLAGS}") + +gmx_manage_lmfit() +target_link_libraries(libgromacs PRIVATE lmfit) + +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^6\.0") + target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Weverything ${IGNORED_CLANG_ALL_WARNINGS}>) +endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/analyze /analyze:stacksize 70000 + #Control flow warnings are disabled because the commond line output is insufficient. There is no tool + #to convert the xml report to e.g. HTML and even in Visual Studio the viewer doesn't work with cmake support. + /wd6001 #unitialized memory + /wd6011 #derefencing NULL + /wd6053 #prior call not zero-terminate + /wd6054 #might not be zero-terminated + /wd6385 #reading invalid data + /wd6386 #buffer overrun + /wd6387 #could be '0' + /wd28199 #uninitialized memory + # For compile time constant (e.g. templates) the following warnings have flase postives + /wd6239 #(<non-zero> && <expr>) + /wd6240 #(<expr> && <non-zero>) + /wd6294 #Ill-defined for-loop + /wd6326 #comparison of constant with other constant + /wd28020 #expression involving paramter is not true + # Misc + /wd6330 #incorrect type to function (warns for char (instead of unsigned) for isspace/isalpha/isdigit/..)) + /wd6993 #OpenMP ignored + #TODO + /wd6031 #return value ignored (important - mostly warnigns about sscanf) + /wd6244 #hides declaration (known issue - we ingore similar warnings for other compilers) + /wd6246 #hides declaration + > + ) +endif() + +if (GMX_CLANG_TIDY) + set_target_properties(libgromacs PROPERTIES CXX_CLANG_TIDY + "${CLANG_TIDY_EXE};-warnings-as-errors=*") +endif() + +gmx_write_installed_header_list() + +# Only install the library in mdrun-only mode if it is actually necessary +# for the binary +if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) + install(TARGETS libgromacs + EXPORT libgromacs + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT libraries) +endif() + +if (NOT GMX_BUILD_MDRUN_ONLY) + include(InstallLibInfo.cmake) +endif() + +# Technically, the user could want to do this for an OpenCL build +# using the CUDA runtime, but currently there's no reason to want to +# do that. +if (INSTALL_CUDART_LIB) #can be set manual by user + if (GMX_USE_CUDA) + foreach(CUDA_LIB ${CUDA_LIBRARIES}) + string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) + if(IS_CUDART) #libcuda should not be installed + #install also name-links (linker uses those) + file(GLOB CUDA_LIBS ${CUDA_LIB}*) + install(FILES ${CUDA_LIBS} DESTINATION + ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries) + endif() + endforeach() + else() + message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support") + endif() +endif() + +if(GMX_USE_OPENCL) + # Install the utility headers + file(GLOB OPENCL_INSTALLED_FILES + gpu_utils/vectype_ops.clh + gpu_utils/device_utils.clh + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/gpu_utils + COMPONENT libraries) + file(GLOB OPENCL_INSTALLED_FILES + pbcutil/ishift.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/pbcutil + COMPONENT libraries) + + # Install the NB source and headers + file(GLOB OPENCL_INSTALLED_FILES + mdlib/nbnxn_consts.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/mdlib + COMPONENT libraries) + file(GLOB OPENCL_INSTALLED_FILES + mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl + mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernels.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernels_fastgen.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernels_fastgen_add_twincut.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh + mdlib/nbnxn_ocl/nbnxn_ocl_consts.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/mdlib/nbnxn_ocl + COMPONENT libraries) + + # Install the PME source and headers + file(GLOB OPENCL_INSTALLED_FILES + ewald/pme-spread.clh + ewald/pme-solve.clh + ewald/pme-gather.clh + ewald/pme-gpu-utils.clh + ewald/pme-program.cl + ewald/pme-gpu-types.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/ewald + COMPONENT libraries) +endif() diff --git a/patches/gromacs-2019.1.diff/src/gromacs/CMakeLists.txt.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/CMakeLists.txt.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..f94af553a91047274f879856836782856d62abcb --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/CMakeLists.txt.preplumed @@ -0,0 +1,445 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by +# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, +# and including many others, as listed in the AUTHORS file in the +# top-level source directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. + +set(LIBGROMACS_SOURCES) + +if (GMX_CLANG_CUDA) + include(gmxClangCudaUtils) +endif() + +set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +set_property(GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES) +set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS) +set_property(GLOBAL PROPERTY GMX_AVX_512_SOURCE) + +add_library(libgromacs_external OBJECT "") +if(CMAKE_COMPILER_IS_GNUCXX) + # Keep quiet about e.g. linearalgebra module + target_compile_options(libgromacs_external PRIVATE ${CXXFLAGS_NO_STRINGOP_TRUNCATION}) +endif() + +add_library(libgromacs_generated OBJECT "") +if (BUILD_SHARED_LIBS) + set_target_properties(libgromacs_external PROPERTIES POSITION_INDEPENDENT_CODE true) + set_target_properties(libgromacs_generated PROPERTIES POSITION_INDEPENDENT_CODE true) +endif() + +function (_gmx_add_files_to_property PROPERTY) + foreach (_file ${ARGN}) + if (IS_ABSOLUTE "${_file}") + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} ${_file}) + else() + set_property(GLOBAL APPEND PROPERTY ${PROPERTY} + ${CMAKE_CURRENT_LIST_DIR}/${_file}) + endif() + endforeach() +endfunction () + +function (gmx_add_libgromacs_sources) + _gmx_add_files_to_property(GMX_LIBGROMACS_SOURCES ${ARGN}) +endfunction () + +# TODO Reconsider this, as the CUDA driver API is probably a simpler +# approach, at least for the build system. See Redmine #2530 +function (gmx_compile_cpp_as_cuda) + _gmx_add_files_to_property(GMX_LIBGROMACS_GPU_IMPL_SOURCES ${ARGN}) +endfunction () + +function (gmx_install_headers) + if (NOT GMX_BUILD_MDRUN_ONLY) + file(RELATIVE_PATH _dest ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_LIST_DIR}) + install(FILES ${ARGN} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${_dest}" + COMPONENT development) + endif() + _gmx_add_files_to_property(GMX_INSTALLED_HEADERS ${ARGN}) +endfunction () + +function (gmx_write_installed_header_list) + get_property(_list GLOBAL PROPERTY GMX_INSTALLED_HEADERS) + string(REPLACE ";" "\n" _list "${_list}") + # TODO: Make this only update the file timestamp if the contents actually change. + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt "${_list}") +endfunction() + +add_subdirectory(gmxlib) +add_subdirectory(mdlib) +add_subdirectory(applied-forces) +add_subdirectory(listed-forces) +add_subdirectory(commandline) +add_subdirectory(domdec) +add_subdirectory(ewald) +add_subdirectory(fft) +add_subdirectory(gpu_utils) +add_subdirectory(hardware) +add_subdirectory(linearalgebra) +add_subdirectory(math) +add_subdirectory(mdrun) +add_subdirectory(mdrunutility) +add_subdirectory(mdtypes) +add_subdirectory(onlinehelp) +add_subdirectory(options) +add_subdirectory(pbcutil) +add_subdirectory(random) +add_subdirectory(restraint) +add_subdirectory(tables) +add_subdirectory(taskassignment) +add_subdirectory(timing) +add_subdirectory(topology) +add_subdirectory(trajectory) +add_subdirectory(utility) +add_subdirectory(fileio) +add_subdirectory(swap) +add_subdirectory(essentialdynamics) +add_subdirectory(pulling) +add_subdirectory(awh) +add_subdirectory(simd) +add_subdirectory(imd) +add_subdirectory(compat) +add_subdirectory(mimic) +if (NOT GMX_BUILD_MDRUN_ONLY) + add_subdirectory(gmxana) + add_subdirectory(gmxpreprocess) + add_subdirectory(correlationfunctions) + add_subdirectory(statistics) + add_subdirectory(analysisdata) + add_subdirectory(selection) + add_subdirectory(trajectoryanalysis) + add_subdirectory(energyanalysis) + add_subdirectory(tools) +endif() + +get_property(PROPERTY_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES} ${PROPERTY_SOURCES}) + +# This would be the standard way to include thread_mpi, but +# we want libgromacs to link the functions directly +#if(GMX_THREAD_MPI) +# add_subdirectory(thread_mpi) +#endif() +#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB}) + +tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src) +target_sources(libgromacs_external PRIVATE ${THREAD_MPI_SOURCES}) + +configure_file(version.h.cmakein version.h) +gmx_install_headers( + analysisdata.h + commandline.h + options.h + random.h + selection.h + trajectoryanalysis.h + utility.h + ${CMAKE_CURRENT_BINARY_DIR}/version.h + ) + +# This code is here instead of utility/CMakeLists.txt, because CMake +# custom commands and source file properties can only be set in the directory +# that contains the target that uses them. +# TODO: Generate a header instead that can be included from baseversion.c. +# That probably simplifies things somewhat. +set(GENERATED_VERSION_FILE utility/baseversion-gen.cpp) +gmx_configure_version_file( + utility/baseversion-gen.cpp.cmakein ${GENERATED_VERSION_FILE} + REMOTE_HASH + EXTRA_VARS + GMX_SOURCE_DOI + ) +list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE} + $<TARGET_OBJECTS:libgromacs_external> + $<TARGET_OBJECTS:libgromacs_generated>) + +# Mark some shared GPU implementation files to compile with CUDA if needed +if (GMX_USE_CUDA) + get_property(LIBGROMACS_GPU_IMPL_SOURCES GLOBAL PROPERTY GMX_LIBGROMACS_GPU_IMPL_SOURCES) + set_source_files_properties(${LIBGROMACS_GPU_IMPL_SOURCES} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) +endif() + +# set up CUDA compilation with clang +if (GMX_CLANG_CUDA) + foreach (_file ${LIBGROMACS_SOURCES}) + get_filename_component(_ext ${_file} EXT) + get_source_file_property(_cuda_source_format ${_file} CUDA_SOURCE_PROPERTY_FORMAT) + if ("${_ext}" STREQUAL ".cu" OR _cuda_source_format) + gmx_compile_cuda_file_with_clang(${_file}) + endif() + endforeach() +endif() + +if (GMX_USE_CUDA) + # Work around FindCUDA that prevents using target_link_libraries() + # with keywords otherwise... + set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) + if (NOT GMX_CLANG_CUDA) + cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) + else() + add_library(libgromacs ${LIBGROMACS_SOURCES}) + endif() + target_link_libraries(libgromacs PRIVATE ${CUDA_CUFFT_LIBRARIES}) +else() + add_library(libgromacs ${LIBGROMACS_SOURCES}) +endif() + +if (GMX_USE_OPENCL) + option(GMX_EXTERNAL_CLFFT "True if an external clFFT is required to be used" FALSE) + mark_as_advanced(GMX_EXTERNAL_CLFFT) + + # Default to using clFFT found on the system + # switch to quiet at the second run. + if (DEFINED clFFT_LIBRARY) + set (clFFT_FIND_QUIETLY TRUE) + endif() + find_package(clFFT) + if (NOT clFFT_FOUND) + if (GMX_EXTERNAL_CLFFT) + message(FATAL_ERROR "Did not find required external clFFT library, consider setting clFFT_ROOT_DIR") + endif() + + if(MSVC) + message(FATAL_ERROR +"An OpenCL build was requested with Visual Studio compiler, but GROMACS +requires clFFT, which was not found on your system. GROMACS does bundle +clFFT to help with building for OpenCL, but that clFFT has not yet been +ported to the more recent versions of that compiler that GROMACS itself +requires. Thus for now, OpenCL is not available with MSVC and the internal +build of clFFT in GROMACS 2019. Either change compiler, try installing +a clFFT package, or use the latest GROMACS 2018 point release.") + endif() + + # Fall back on the internal version + set (_clFFT_dir ../external/clFFT/src) + add_subdirectory(${_clFFT_dir} clFFT-build) + target_sources(libgromacs PRIVATE + $<TARGET_OBJECTS:clFFT> + ) + target_include_directories(libgromacs SYSTEM PRIVATE ${_clFFT_dir}/include) + # Use the magic variable for how to link any library needed for + # dlopen, etc. which is -ldl where needed, and empty otherwise + # (e.g. Windows, BSD, Mac). + target_link_libraries(libgromacs PRIVATE "${CMAKE_DL_LIBS}") + else() + target_link_libraries(libgromacs PRIVATE clFFT) + endif() +endif() + +# Recent versions of gcc and clang give warnings on scanner.cpp, which +# is a generated source file. These are awkward to suppress inline, so +# we do it in the compilation command (after testing that the compiler +# supports the suppressions). Same issue exists for nonbonded kernels +# so we supress them for all generated files. +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-Wno-unused -Wno-unused-parameter" HAS_NO_UNUSED) +check_cxx_compiler_flag(-Wno-missing-declarations HAS_NO_MISSING_DECL) +check_cxx_compiler_flag(-Wno-missing-prototypes HAS_NO_MISSING_PROTO) +check_cxx_compiler_flag(/wd4101 HAS_NO_MSVC_UNUSED) +if (NOT MSVC) + check_cxx_compiler_flag(-wd1419 HAS_DECL_IN_SOURCE) +endif() +if (HAS_NO_UNUSED) + target_compile_options(libgromacs_generated PRIVATE "-Wno-unused;-Wno-unused-parameter") +endif() +if (HAS_NO_MISSING_DECL) + target_compile_options(libgromacs_generated PRIVATE "-Wno-missing-declarations") +endif() +# TODO The group scheme kernels don't use proper function prototype +# declarations, and clang warns about such use, which we suppress +# rather than fix. We would prefer to use no suppressions. However +# other compilers do not support such a warning suppression for C++ +# source files, and issue warnings about that. Remove the use of +# -Wno-missing-prototypes here and above when the group scheme is +# removed. +if (HAS_NO_MISSING_PROTO AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + target_compile_options(libgromacs_generated PRIVATE "-Wno-missing-prototypes") +endif() +if (HAS_NO_MSVC_UNUSED) + target_compile_options(libgromacs_generated PRIVATE "/wd4101") +endif() +if (HAS_DECL_IN_SOURCE) + target_compile_options(libgromacs_generated PRIVATE "-wd1419") +endif() + +if(SIMD_AVX_512_CXX_SUPPORTED AND NOT ("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_512_KNL")) + # Since we might be overriding -march=core-avx2, add a flag so we don't warn for this specific file. + # On KNL this can cause illegal instruction because the compiler might use non KNL AVX instructions + # with the SIMD_AVX_512_CXX_FLAGS flags. + set_source_files_properties(hardware/identifyavx512fmaunits.cpp PROPERTIES COMPILE_FLAGS "${SIMD_AVX_512_CXX_FLAGS} ${CXX_NO_UNUSED_OPTION_WARNING_FLAGS}") +endif() + +gmx_setup_tng_for_libgromacs() + +target_link_libraries(libgromacs + PRIVATE + ${EXTRAE_LIBRARIES} + ${GMX_EXTRA_LIBRARIES} + ${GMX_COMMON_LIBRARIES} + ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES} + ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} + ${OpenCL_LIBRARIES} + ${GMX_STDLIB_LIBRARIES} + PUBLIC + ${GMX_PUBLIC_LIBRARIES} + ) +set_target_properties(libgromacs PROPERTIES + OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}" + SOVERSION ${LIBRARY_SOVERSION_MAJOR} + VERSION ${LIBRARY_VERSION} + COMPILE_FLAGS "${OpenMP_C_FLAGS}") + +gmx_manage_lmfit() +target_link_libraries(libgromacs PRIVATE lmfit) + +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION MATCHES "^6\.0") + target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-Weverything ${IGNORED_CLANG_ALL_WARNINGS}>) +endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(libgromacs PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/analyze /analyze:stacksize 70000 + #Control flow warnings are disabled because the commond line output is insufficient. There is no tool + #to convert the xml report to e.g. HTML and even in Visual Studio the viewer doesn't work with cmake support. + /wd6001 #unitialized memory + /wd6011 #derefencing NULL + /wd6053 #prior call not zero-terminate + /wd6054 #might not be zero-terminated + /wd6385 #reading invalid data + /wd6386 #buffer overrun + /wd6387 #could be '0' + /wd28199 #uninitialized memory + # For compile time constant (e.g. templates) the following warnings have flase postives + /wd6239 #(<non-zero> && <expr>) + /wd6240 #(<expr> && <non-zero>) + /wd6294 #Ill-defined for-loop + /wd6326 #comparison of constant with other constant + /wd28020 #expression involving paramter is not true + # Misc + /wd6330 #incorrect type to function (warns for char (instead of unsigned) for isspace/isalpha/isdigit/..)) + /wd6993 #OpenMP ignored + #TODO + /wd6031 #return value ignored (important - mostly warnigns about sscanf) + /wd6244 #hides declaration (known issue - we ingore similar warnings for other compilers) + /wd6246 #hides declaration + > + ) +endif() + +if (GMX_CLANG_TIDY) + set_target_properties(libgromacs PROPERTIES CXX_CLANG_TIDY + "${CLANG_TIDY_EXE};-warnings-as-errors=*") +endif() + +gmx_write_installed_header_list() + +# Only install the library in mdrun-only mode if it is actually necessary +# for the binary +if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS) + install(TARGETS libgromacs + EXPORT libgromacs + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT libraries) +endif() + +if (NOT GMX_BUILD_MDRUN_ONLY) + include(InstallLibInfo.cmake) +endif() + +# Technically, the user could want to do this for an OpenCL build +# using the CUDA runtime, but currently there's no reason to want to +# do that. +if (INSTALL_CUDART_LIB) #can be set manual by user + if (GMX_USE_CUDA) + foreach(CUDA_LIB ${CUDA_LIBRARIES}) + string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB}) + if(IS_CUDART) #libcuda should not be installed + #install also name-links (linker uses those) + file(GLOB CUDA_LIBS ${CUDA_LIB}*) + install(FILES ${CUDA_LIBS} DESTINATION + ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries) + endif() + endforeach() + else() + message(WARNING "INSTALL_CUDART_LIB only makes sense when configuring for CUDA support") + endif() +endif() + +if(GMX_USE_OPENCL) + # Install the utility headers + file(GLOB OPENCL_INSTALLED_FILES + gpu_utils/vectype_ops.clh + gpu_utils/device_utils.clh + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/gpu_utils + COMPONENT libraries) + file(GLOB OPENCL_INSTALLED_FILES + pbcutil/ishift.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/pbcutil + COMPONENT libraries) + + # Install the NB source and headers + file(GLOB OPENCL_INSTALLED_FILES + mdlib/nbnxn_consts.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/mdlib + COMPONENT libraries) + file(GLOB OPENCL_INSTALLED_FILES + mdlib/nbnxn_ocl/nbnxn_ocl_kernels.cl + mdlib/nbnxn_ocl/nbnxn_ocl_kernel.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernel_pruneonly.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernels.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernels_fastgen.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernels_fastgen_add_twincut.clh + mdlib/nbnxn_ocl/nbnxn_ocl_kernel_utils.clh + mdlib/nbnxn_ocl/nbnxn_ocl_consts.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/mdlib/nbnxn_ocl + COMPONENT libraries) + + # Install the PME source and headers + file(GLOB OPENCL_INSTALLED_FILES + ewald/pme-spread.clh + ewald/pme-solve.clh + ewald/pme-gather.clh + ewald/pme-gpu-utils.clh + ewald/pme-program.cl + ewald/pme-gpu-types.h + ) + install(FILES ${OPENCL_INSTALLED_FILES} + DESTINATION ${GMX_INSTALL_OCLDIR}/gromacs/ewald + COMPONENT libraries) +endif() diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdlib/force.cpp b/patches/gromacs-2019.1.diff/src/gromacs/mdlib/force.cpp new file mode 100644 index 0000000000000000000000000000000000000000..32bbd0cc08ab73c71ffa8cbfba8f0cdb32d8224e --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdlib/force.cpp @@ -0,0 +1,879 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#include "gmxpre.h" + +#include "force.h" + +#include "config.h" + +#include <cassert> +#include <cmath> +#include <cstring> + +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/ewald.h" +#include "gromacs/ewald/long-range-correction.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gmxlib/nonbonded/nonbonded.h" +#include "gromacs/listed-forces/listed-forces.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vecdump.h" +#include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/forcerec-threading.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdlib/rf_util.h" +#include "gromacs/mdlib/wall.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/enerdata.h" +#include "gromacs/mdtypes/forceoutput.h" +#include "gromacs/mdtypes/forcerec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" +/* PLUMED */ +#include "../../../Plumed.h" +int plumedswitch=0; +plumed plumedmain; +void(*plumedcmd)(plumed,const char*,const void*)=NULL; +/* END PLUMED */ + +void ns(FILE *fp, + t_forcerec *fr, + matrix box, + const gmx_groups_t *groups, + gmx_localtop_t *top, + const t_mdatoms *md, + const t_commrec *cr, + t_nrnb *nrnb, + gmx_bool bFillGrid) +{ + int nsearch; + + + if (!fr->ns->nblist_initialized) + { + init_neighbor_list(fp, fr, md->homenr); + } + + nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md, + bFillGrid); + if (debug) + { + fprintf(debug, "nsearch = %d\n", nsearch); + } + + /* Check whether we have to do dynamic load balancing */ + /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) + count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, + &(top->idef),opts->ngener); + */ + if (fr->ns->dump_nl > 0) + { + dump_nblist(fp, cr, fr, fr->ns->dump_nl); + } +} + +static void clearEwaldThreadOutput(ewald_corr_thread_t *ewc_t) +{ + ewc_t->Vcorr_q = 0; + ewc_t->Vcorr_lj = 0; + ewc_t->dvdl[efptCOUL] = 0; + ewc_t->dvdl[efptVDW] = 0; + clear_mat(ewc_t->vir_q); + clear_mat(ewc_t->vir_lj); +} + +static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t *ewc_t) +{ + ewald_corr_thread_t &dest = ewc_t[0]; + + for (int t = 1; t < nthreads; t++) + { + dest.Vcorr_q += ewc_t[t].Vcorr_q; + dest.Vcorr_lj += ewc_t[t].Vcorr_lj; + dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL]; + dest.dvdl[efptVDW] += ewc_t[t].dvdl[efptVDW]; + m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q); + m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj); + } +} + +void do_force_lowlevel(t_forcerec *fr, + const t_inputrec *ir, + const t_idef *idef, + const t_commrec *cr, + const gmx_multisim_t *ms, + t_nrnb *nrnb, + gmx_wallcycle_t wcycle, + const t_mdatoms *md, + rvec x[], + history_t *hist, + rvec *forceForUseWithShiftForces, + gmx::ForceWithVirial *forceWithVirial, + gmx_enerdata_t *enerd, + t_fcdata *fcd, + matrix box, + t_lambda *fepvals, + real *lambda, + const t_graph *graph, + const t_blocka *excl, + rvec mu_tot[], + int flags, + float *cycles_pme) +{ + int i, j; + int donb_flags; + int pme_flags; + t_pbc pbc; + real dvdl_dum[efptNR], dvdl_nb[efptNR]; + +#if GMX_MPI + double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ +#endif + + set_pbc(&pbc, fr->ePBC, box); + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + dvdl_dum[i] = 0; + } + + /* do QMMM first if requested */ + if (fr->bQMMM) + { + enerd->term[F_EQM] = calculate_QMMM(cr, forceForUseWithShiftForces, fr); + } + + /* Call the short range functions all in one go. */ + +#if GMX_MPI + /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ +#define TAKETIME FALSE + if (TAKETIME) + { + MPI_Barrier(cr->mpi_comm_mygroup); + t0 = MPI_Wtime(); + } +#endif + + if (ir->nwall) + { + /* foreign lambda component for walls */ + real dvdl_walls = do_walls(*ir, *fr, box, *md, x, + forceWithVirial, lambda[efptVDW], + enerd->grpp.ener[egLJSR], nrnb); + enerd->dvdl_lin[efptVDW] += dvdl_walls; + } + + /* We only do non-bonded calculation with group scheme here, the verlet + * calls are done from do_force_cutsVERLET(). */ + if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) + { + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + /* Currently all group scheme kernels always calculate (shift-)forces */ + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_VIRIAL) + { + donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + + wallcycle_sub_start(wcycle, ewcsNONBONDED); + do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl, + &enerd->grpp, nrnb, + lambda, dvdl_nb, -1, -1, donb_flags); + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + real lam_i[efptNR]; + + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); + do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl, + &(enerd->foreign_grpp), nrnb, + lam_i, dvdl_dum, -1, -1, + (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); + sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + +#if GMX_MPI + if (TAKETIME) + { + t1 = MPI_Wtime(); + fr->t_fnbf += t1-t0; + } +#endif + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + } + + if (fepvals->sc_alpha != 0) + + /* even though coulomb part is linear, we already added it, beacuse we + need to go through the vdw calculation anyway */ + { + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + if (debug) + { + pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); + } + + /* Shift the coordinates. Must be done before listed forces and PPPM, + * but is also necessary for SHAKE and update, therefore it can NOT + * go when no listed forces have to be evaluated. + * + * The shifting and PBC code is deliberately not timed, since with + * the Verlet scheme it only takes non-zero time with triclinic + * boxes, and even then the time is around a factor of 100 less + * than the next smallest counter. + */ + + + /* Here sometimes we would not need to shift with NBFonly, + * but we do so anyhow for consistency of the returned coordinates. + */ + if (graph) + { + shift_self(graph, box, x); + if (TRICLINIC(box)) + { + inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); + } + else + { + inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); + } + } + /* Check whether we need to do listed interactions or correct for exclusions */ + if (fr->bMolPBC && + ((flags & GMX_FORCE_LISTED) + || EEL_RF(fr->ic->eeltype) || EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))) + { + /* TODO There are no electrostatics methods that require this + transformation, when using the Verlet scheme, so update the + above conditional. */ + /* Since all atoms are in the rectangular or triclinic unit-cell, + * only single box vector shifts (2 in x) are required. + */ + set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr, + TRUE, box); + } + + do_force_listed(wcycle, box, ir->fepvals, cr, ms, + idef, x, hist, + forceForUseWithShiftForces, forceWithVirial, + fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, + flags); + + + *cycles_pme = 0; + + /* Do long-range electrostatics and/or LJ-PME, including related short-range + * corrections. + */ + if (EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) + { + int status = 0; + real Vlr_q = 0, Vlr_lj = 0; + + /* We reduce all virial, dV/dlambda and energy contributions, except + * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct. + */ + ewald_corr_thread_t &ewaldOutput = fr->ewc_t[0]; + clearEwaldThreadOutput(&ewaldOutput); + + if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) + { + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + /* The TPI molecule does not have exclusions with the rest + * of the system and no intra-molecular PME grid + * contributions will be calculated in + * gmx_pme_calc_energy. + */ + if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || + ir->ewald_geometry != eewg3D || + ir->epsilon_surface != 0) + { + int nthreads, t; + + wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); + + if (fr->n_tpi > 0) + { + gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); + } + + nthreads = fr->nthread_ewc; +#pragma omp parallel for num_threads(nthreads) schedule(static) + for (t = 0; t < nthreads; t++) + { + try + { + ewald_corr_thread_t &ewc_t = fr->ewc_t[t]; + if (t > 0) + { + clearEwaldThreadOutput(&ewc_t); + } + + /* Threading is only supported with the Verlet cut-off + * scheme and then only single particle forces (no + * exclusion forces) are calculated, so we can store + * the forces in the normal, single forceWithVirial->force_ array. + */ + ewald_LRcorrection(md->homenr, cr, nthreads, t, fr, ir, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + md->sigma3A, md->sigma3B, + (md->nChargePerturbed != 0) || (md->nTypePerturbed != 0), + ir->cutoff_scheme != ecutsVERLET, + excl, x, box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + as_rvec_array(forceWithVirial->force_.data()), + ewc_t.vir_q, ewc_t.vir_lj, + &ewc_t.Vcorr_q, &ewc_t.Vcorr_lj, + lambda[efptCOUL], lambda[efptVDW], + &ewc_t.dvdl[efptCOUL], &ewc_t.dvdl[efptVDW]); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + if (nthreads > 1) + { + reduceEwaldThreadOuput(nthreads, fr->ewc_t); + } + wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); + } + + if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0) + { + /* This is not in a subcounter because it takes a + negligible and constant-sized amount of time */ + ewaldOutput.Vcorr_q += + ewald_charge_correction(cr, fr, lambda[efptCOUL], box, + &ewaldOutput.dvdl[efptCOUL], + ewaldOutput.vir_q); + } + + if ((EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) && + thisRankHasDuty(cr, DUTY_PME) && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU)) + { + /* Do reciprocal PME for Coulomb and/or LJ. */ + assert(fr->n_tpi >= 0); + if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) + { + pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; + + if (flags & GMX_FORCE_FORCES) + { + pme_flags |= GMX_PME_CALC_F; + } + if (flags & GMX_FORCE_VIRIAL) + { + pme_flags |= GMX_PME_CALC_ENER_VIR; + } + if (fr->n_tpi > 0) + { + /* We don't calculate f, but we do want the potential */ + pme_flags |= GMX_PME_CALC_POT; + } + + /* With domain decomposition we close the CPU side load + * balancing region here, because PME does global + * communication that acts as a global barrier. + */ + if (DOMAINDECOMP(cr)) + { + ddCloseBalanceRegionCpu(cr->dd); + } + + wallcycle_start(wcycle, ewcPMEMESH); + status = gmx_pme_do(fr->pmedata, + 0, md->homenr - fr->n_tpi, + x, + as_rvec_array(forceWithVirial->force_.data()), + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + box, cr, + DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, + DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, + nrnb, wcycle, + ewaldOutput.vir_q, ewaldOutput.vir_lj, + &Vlr_q, &Vlr_lj, + lambda[efptCOUL], lambda[efptVDW], + &ewaldOutput.dvdl[efptCOUL], + &ewaldOutput.dvdl[efptVDW], + pme_flags); + *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); + if (status != 0) + { + gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); + } + + /* We should try to do as little computation after + * this as possible, because parallel PME synchronizes + * the nodes, so we want all load imbalance of the + * rest of the force calculation to be before the PME + * call. DD load balancing is done on the whole time + * of the force call (without PME). + */ + } + if (fr->n_tpi > 0) + { + if (EVDW_PME(ir->vdwtype)) + { + + gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); + } + /* Determine the PME grid energy of the test molecule + * with the PME grid potential of the other charges. + */ + gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, + x + md->homenr - fr->n_tpi, + md->chargeA + md->homenr - fr->n_tpi, + &Vlr_q); + } + } + } + + if (!EEL_PME(fr->ic->eeltype) && EEL_PME_EWALD(fr->ic->eeltype)) + { + Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial->force_.data()), + md->chargeA, md->chargeB, + box, cr, md->homenr, + ewaldOutput.vir_q, fr->ic->ewaldcoeff_q, + lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL], + fr->ewald_table); + } + + /* Note that with separate PME nodes we get the real energies later */ + // TODO it would be simpler if we just accumulated a single + // long-range virial contribution. + forceWithVirial->addVirialContribution(ewaldOutput.vir_q); + forceWithVirial->addVirialContribution(ewaldOutput.vir_lj); + enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL]; + enerd->dvdl_lin[efptVDW] += ewaldOutput.dvdl[efptVDW]; + enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q; + enerd->term[F_LJ_RECIP] = Vlr_lj + ewaldOutput.Vcorr_lj; + + if (debug) + { + fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", + Vlr_q, ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]); + pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM); + pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", + Vlr_lj, ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]); + pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM); + } + } + else + { + /* Is there a reaction-field exclusion correction needed? + * With the Verlet scheme, exclusion forces are calculated + * in the non-bonded kernel. + */ + if (ir->cutoff_scheme != ecutsVERLET && EEL_RF(fr->ic->eeltype)) + { + real dvdl_rf_excl = 0; + enerd->term[F_RF_EXCL] = + RF_excl_correction(fr, graph, md, excl, DOMAINDECOMP(cr), + x, forceForUseWithShiftForces, + fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); + + enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; + } + } + + if (debug) + { + print_nrnb(debug, nrnb); + } + +#if GMX_MPI + if (TAKETIME) + { + t2 = MPI_Wtime(); + MPI_Barrier(cr->mpi_comm_mygroup); + t3 = MPI_Wtime(); + fr->t_wait += t3-t2; + if (fr->timesteps == 11) + { + char buf[22]; + fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", + cr->nodeid, gmx_step_str(fr->timesteps, buf), + 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), + (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); + } + fr->timesteps++; + } +#endif + + if (debug) + { + pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + } + + /* PLUMED */ + if(plumedswitch){ + int plumedNeedsEnergy; + (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); + if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL); + } + /* END PLUMED */ +} + +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) +{ + int i, n2; + + for (i = 0; i < F_NRE; i++) + { + enerd->term[i] = 0; + enerd->foreign_term[i] = 0; + } + + + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0; + enerd->dvdl_nonlin[i] = 0; + } + + n2 = ngener*ngener; + if (debug) + { + fprintf(debug, "Creating %d sized group matrix for energies\n", n2); + } + enerd->grpp.nener = n2; + enerd->foreign_grpp.nener = n2; + for (i = 0; (i < egNR); i++) + { + snew(enerd->grpp.ener[i], n2); + snew(enerd->foreign_grpp.ener[i], n2); + } + + if (n_lambda) + { + enerd->n_lambda = 1 + n_lambda; + snew(enerd->enerpart_lambda, enerd->n_lambda); + } + else + { + enerd->n_lambda = 0; + } +} + +void destroy_enerdata(gmx_enerdata_t *enerd) +{ + int i; + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->grpp.ener[i]); + } + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->foreign_grpp.ener[i]); + } + + if (enerd->n_lambda) + { + sfree(enerd->enerpart_lambda); + } +} + +static real sum_v(int n, const real v[]) +{ + real t; + int i; + + t = 0.0; + for (i = 0; (i < n); i++) + { + t = t + v[i]; + } + + return t; +} + +void sum_epot(gmx_grppairener_t *grpp, real *epot) +{ + int i; + + /* Accumulate energies */ + epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); + epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); + epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); + epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); + +/* lattice part of LR doesnt belong to any group + * and has been added earlier + */ + epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); + + epot[F_EPOT] = 0; + for (i = 0; (i < F_EPOT); i++) + { + if (i != F_DISRESVIOL && i != F_ORIRESDEV) + { + epot[F_EPOT] += epot[i]; + } + } +} + +void sum_dhdl(gmx_enerdata_t *enerd, gmx::ArrayRef<const real> lambda, t_lambda *fepvals) +{ + int index; + + enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ + enerd->term[F_DVDL] = 0.0; + for (int i = 0; i < efptNR; i++) + { + if (fepvals->separate_dvdl[i]) + { + /* could this be done more readably/compactly? */ + switch (i) + { + case (efptMASS): + index = F_DKDL; + break; + case (efptCOUL): + index = F_DVDL_COUL; + break; + case (efptVDW): + index = F_DVDL_VDW; + break; + case (efptBONDED): + index = F_DVDL_BONDED; + break; + case (efptRESTRAINT): + index = F_DVDL_RESTRAINT; + break; + default: + index = F_DVDL; + break; + } + enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + else + { + enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + } + + if (fepvals->separate_dvdl[efptBONDED]) + { + enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; + } + else + { + enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; + } + + for (int i = 0; i < fepvals->n_lambda; i++) + { + /* note we are iterating over fepvals here! + For the current lam, dlam = 0 automatically, + so we don't need to add anything to the + enerd->enerpart_lambda[0] */ + + /* we don't need to worry about dvdl_lin contributions to dE at + current lambda, because the contributions to the current + lambda are automatically zeroed */ + + double &enerpart_lambda = enerd->enerpart_lambda[i + 1]; + + for (gmx::index j = 0; j < lambda.size(); j++) + { + /* Note that this loop is over all dhdl components, not just the separated ones */ + const double dlam = fepvals->all_lambda[j][i] - lambda[j]; + + enerpart_lambda += dlam*enerd->dvdl_lin[j]; + + /* Constraints can not be evaluated at foreign lambdas, so we add + * a linear extrapolation. This is an approximation, but usually + * quite accurate since constraints change little between lambdas. + */ + if ((j == efptBONDED && fepvals->separate_dvdl[efptBONDED]) || + (j == efptFEP && !fepvals->separate_dvdl[efptBONDED])) + { + enerpart_lambda += dlam*enerd->term[F_DVDL_CONSTR]; + } + + if (j == efptMASS && !fepvals->separate_dvdl[j]) + { + enerpart_lambda += dlam*enerd->term[F_DKDL]; + } + + if (debug) + { + fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", + fepvals->all_lambda[j][i], efpt_names[j], + enerpart_lambda - enerd->enerpart_lambda[0], + dlam, enerd->dvdl_lin[j]); + } + } + } + + /* The constrain contribution is now included in other terms, so clear it */ + enerd->term[F_DVDL_CONSTR] = 0; +} + + +void reset_foreign_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all foreign energy components. Foreign energies always called on + neighbor search steps */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->foreign_grpp.ener[i][j] = 0.0; + } + } + + /* potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->foreign_term[i] = 0.0; + } +} + +void reset_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all energy components. */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->grpp.ener[i][j] = 0.0; + } + } + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0.0; + enerd->dvdl_nonlin[i] = 0.0; + } + + /* Normal potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->term[i] = 0.0; + } + enerd->term[F_DVDL] = 0.0; + enerd->term[F_DVDL_COUL] = 0.0; + enerd->term[F_DVDL_VDW] = 0.0; + enerd->term[F_DVDL_BONDED] = 0.0; + enerd->term[F_DVDL_RESTRAINT] = 0.0; + enerd->term[F_DKDL] = 0.0; + if (enerd->n_lambda > 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + enerd->enerpart_lambda[i] = 0.0; + } + } + /* reset foreign energy data - separate function since we also call it elsewhere */ + reset_foreign_enerdata(enerd); +} diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdlib/force.cpp.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdlib/force.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..8a7615365140ca43108e1a445b360f36696a93b3 --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdlib/force.cpp.preplumed @@ -0,0 +1,866 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#include "gmxpre.h" + +#include "force.h" + +#include "config.h" + +#include <cassert> +#include <cmath> +#include <cstring> + +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/ewald/ewald.h" +#include "gromacs/ewald/long-range-correction.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gmxlib/nonbonded/nonbonded.h" +#include "gromacs/listed-forces/listed-forces.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vecdump.h" +#include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/forcerec-threading.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdlib/rf_util.h" +#include "gromacs/mdlib/wall.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/enerdata.h" +#include "gromacs/mdtypes/forceoutput.h" +#include "gromacs/mdtypes/forcerec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/pbcutil/ishift.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/smalloc.h" + +void ns(FILE *fp, + t_forcerec *fr, + matrix box, + const gmx_groups_t *groups, + gmx_localtop_t *top, + const t_mdatoms *md, + const t_commrec *cr, + t_nrnb *nrnb, + gmx_bool bFillGrid) +{ + int nsearch; + + + if (!fr->ns->nblist_initialized) + { + init_neighbor_list(fp, fr, md->homenr); + } + + nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md, + bFillGrid); + if (debug) + { + fprintf(debug, "nsearch = %d\n", nsearch); + } + + /* Check whether we have to do dynamic load balancing */ + /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) + count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, + &(top->idef),opts->ngener); + */ + if (fr->ns->dump_nl > 0) + { + dump_nblist(fp, cr, fr, fr->ns->dump_nl); + } +} + +static void clearEwaldThreadOutput(ewald_corr_thread_t *ewc_t) +{ + ewc_t->Vcorr_q = 0; + ewc_t->Vcorr_lj = 0; + ewc_t->dvdl[efptCOUL] = 0; + ewc_t->dvdl[efptVDW] = 0; + clear_mat(ewc_t->vir_q); + clear_mat(ewc_t->vir_lj); +} + +static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t *ewc_t) +{ + ewald_corr_thread_t &dest = ewc_t[0]; + + for (int t = 1; t < nthreads; t++) + { + dest.Vcorr_q += ewc_t[t].Vcorr_q; + dest.Vcorr_lj += ewc_t[t].Vcorr_lj; + dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL]; + dest.dvdl[efptVDW] += ewc_t[t].dvdl[efptVDW]; + m_add(dest.vir_q, ewc_t[t].vir_q, dest.vir_q); + m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj); + } +} + +void do_force_lowlevel(t_forcerec *fr, + const t_inputrec *ir, + const t_idef *idef, + const t_commrec *cr, + const gmx_multisim_t *ms, + t_nrnb *nrnb, + gmx_wallcycle_t wcycle, + const t_mdatoms *md, + rvec x[], + history_t *hist, + rvec *forceForUseWithShiftForces, + gmx::ForceWithVirial *forceWithVirial, + gmx_enerdata_t *enerd, + t_fcdata *fcd, + matrix box, + t_lambda *fepvals, + real *lambda, + const t_graph *graph, + const t_blocka *excl, + rvec mu_tot[], + int flags, + float *cycles_pme) +{ + int i, j; + int donb_flags; + int pme_flags; + t_pbc pbc; + real dvdl_dum[efptNR], dvdl_nb[efptNR]; + +#if GMX_MPI + double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ +#endif + + set_pbc(&pbc, fr->ePBC, box); + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + dvdl_dum[i] = 0; + } + + /* do QMMM first if requested */ + if (fr->bQMMM) + { + enerd->term[F_EQM] = calculate_QMMM(cr, forceForUseWithShiftForces, fr); + } + + /* Call the short range functions all in one go. */ + +#if GMX_MPI + /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ +#define TAKETIME FALSE + if (TAKETIME) + { + MPI_Barrier(cr->mpi_comm_mygroup); + t0 = MPI_Wtime(); + } +#endif + + if (ir->nwall) + { + /* foreign lambda component for walls */ + real dvdl_walls = do_walls(*ir, *fr, box, *md, x, + forceWithVirial, lambda[efptVDW], + enerd->grpp.ener[egLJSR], nrnb); + enerd->dvdl_lin[efptVDW] += dvdl_walls; + } + + /* We only do non-bonded calculation with group scheme here, the verlet + * calls are done from do_force_cutsVERLET(). */ + if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) + { + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + /* Currently all group scheme kernels always calculate (shift-)forces */ + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_VIRIAL) + { + donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + + wallcycle_sub_start(wcycle, ewcsNONBONDED); + do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl, + &enerd->grpp, nrnb, + lambda, dvdl_nb, -1, -1, donb_flags); + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + real lam_i[efptNR]; + + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); + do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl, + &(enerd->foreign_grpp), nrnb, + lam_i, dvdl_dum, -1, -1, + (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); + sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + +#if GMX_MPI + if (TAKETIME) + { + t1 = MPI_Wtime(); + fr->t_fnbf += t1-t0; + } +#endif + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + } + + if (fepvals->sc_alpha != 0) + + /* even though coulomb part is linear, we already added it, beacuse we + need to go through the vdw calculation anyway */ + { + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + if (debug) + { + pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); + } + + /* Shift the coordinates. Must be done before listed forces and PPPM, + * but is also necessary for SHAKE and update, therefore it can NOT + * go when no listed forces have to be evaluated. + * + * The shifting and PBC code is deliberately not timed, since with + * the Verlet scheme it only takes non-zero time with triclinic + * boxes, and even then the time is around a factor of 100 less + * than the next smallest counter. + */ + + + /* Here sometimes we would not need to shift with NBFonly, + * but we do so anyhow for consistency of the returned coordinates. + */ + if (graph) + { + shift_self(graph, box, x); + if (TRICLINIC(box)) + { + inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); + } + else + { + inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); + } + } + /* Check whether we need to do listed interactions or correct for exclusions */ + if (fr->bMolPBC && + ((flags & GMX_FORCE_LISTED) + || EEL_RF(fr->ic->eeltype) || EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))) + { + /* TODO There are no electrostatics methods that require this + transformation, when using the Verlet scheme, so update the + above conditional. */ + /* Since all atoms are in the rectangular or triclinic unit-cell, + * only single box vector shifts (2 in x) are required. + */ + set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr, + TRUE, box); + } + + do_force_listed(wcycle, box, ir->fepvals, cr, ms, + idef, x, hist, + forceForUseWithShiftForces, forceWithVirial, + fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr, + flags); + + + *cycles_pme = 0; + + /* Do long-range electrostatics and/or LJ-PME, including related short-range + * corrections. + */ + if (EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) + { + int status = 0; + real Vlr_q = 0, Vlr_lj = 0; + + /* We reduce all virial, dV/dlambda and energy contributions, except + * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct. + */ + ewald_corr_thread_t &ewaldOutput = fr->ewc_t[0]; + clearEwaldThreadOutput(&ewaldOutput); + + if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) + { + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + /* The TPI molecule does not have exclusions with the rest + * of the system and no intra-molecular PME grid + * contributions will be calculated in + * gmx_pme_calc_energy. + */ + if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || + ir->ewald_geometry != eewg3D || + ir->epsilon_surface != 0) + { + int nthreads, t; + + wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); + + if (fr->n_tpi > 0) + { + gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); + } + + nthreads = fr->nthread_ewc; +#pragma omp parallel for num_threads(nthreads) schedule(static) + for (t = 0; t < nthreads; t++) + { + try + { + ewald_corr_thread_t &ewc_t = fr->ewc_t[t]; + if (t > 0) + { + clearEwaldThreadOutput(&ewc_t); + } + + /* Threading is only supported with the Verlet cut-off + * scheme and then only single particle forces (no + * exclusion forces) are calculated, so we can store + * the forces in the normal, single forceWithVirial->force_ array. + */ + ewald_LRcorrection(md->homenr, cr, nthreads, t, fr, ir, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + md->sigma3A, md->sigma3B, + (md->nChargePerturbed != 0) || (md->nTypePerturbed != 0), + ir->cutoff_scheme != ecutsVERLET, + excl, x, box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + as_rvec_array(forceWithVirial->force_.data()), + ewc_t.vir_q, ewc_t.vir_lj, + &ewc_t.Vcorr_q, &ewc_t.Vcorr_lj, + lambda[efptCOUL], lambda[efptVDW], + &ewc_t.dvdl[efptCOUL], &ewc_t.dvdl[efptVDW]); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + if (nthreads > 1) + { + reduceEwaldThreadOuput(nthreads, fr->ewc_t); + } + wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); + } + + if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0) + { + /* This is not in a subcounter because it takes a + negligible and constant-sized amount of time */ + ewaldOutput.Vcorr_q += + ewald_charge_correction(cr, fr, lambda[efptCOUL], box, + &ewaldOutput.dvdl[efptCOUL], + ewaldOutput.vir_q); + } + + if ((EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) && + thisRankHasDuty(cr, DUTY_PME) && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU)) + { + /* Do reciprocal PME for Coulomb and/or LJ. */ + assert(fr->n_tpi >= 0); + if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) + { + pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE; + + if (flags & GMX_FORCE_FORCES) + { + pme_flags |= GMX_PME_CALC_F; + } + if (flags & GMX_FORCE_VIRIAL) + { + pme_flags |= GMX_PME_CALC_ENER_VIR; + } + if (fr->n_tpi > 0) + { + /* We don't calculate f, but we do want the potential */ + pme_flags |= GMX_PME_CALC_POT; + } + + /* With domain decomposition we close the CPU side load + * balancing region here, because PME does global + * communication that acts as a global barrier. + */ + if (DOMAINDECOMP(cr)) + { + ddCloseBalanceRegionCpu(cr->dd); + } + + wallcycle_start(wcycle, ewcPMEMESH); + status = gmx_pme_do(fr->pmedata, + 0, md->homenr - fr->n_tpi, + x, + as_rvec_array(forceWithVirial->force_.data()), + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + box, cr, + DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, + DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, + nrnb, wcycle, + ewaldOutput.vir_q, ewaldOutput.vir_lj, + &Vlr_q, &Vlr_lj, + lambda[efptCOUL], lambda[efptVDW], + &ewaldOutput.dvdl[efptCOUL], + &ewaldOutput.dvdl[efptVDW], + pme_flags); + *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); + if (status != 0) + { + gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); + } + + /* We should try to do as little computation after + * this as possible, because parallel PME synchronizes + * the nodes, so we want all load imbalance of the + * rest of the force calculation to be before the PME + * call. DD load balancing is done on the whole time + * of the force call (without PME). + */ + } + if (fr->n_tpi > 0) + { + if (EVDW_PME(ir->vdwtype)) + { + + gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME"); + } + /* Determine the PME grid energy of the test molecule + * with the PME grid potential of the other charges. + */ + gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, + x + md->homenr - fr->n_tpi, + md->chargeA + md->homenr - fr->n_tpi, + &Vlr_q); + } + } + } + + if (!EEL_PME(fr->ic->eeltype) && EEL_PME_EWALD(fr->ic->eeltype)) + { + Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial->force_.data()), + md->chargeA, md->chargeB, + box, cr, md->homenr, + ewaldOutput.vir_q, fr->ic->ewaldcoeff_q, + lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL], + fr->ewald_table); + } + + /* Note that with separate PME nodes we get the real energies later */ + // TODO it would be simpler if we just accumulated a single + // long-range virial contribution. + forceWithVirial->addVirialContribution(ewaldOutput.vir_q); + forceWithVirial->addVirialContribution(ewaldOutput.vir_lj); + enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL]; + enerd->dvdl_lin[efptVDW] += ewaldOutput.dvdl[efptVDW]; + enerd->term[F_COUL_RECIP] = Vlr_q + ewaldOutput.Vcorr_q; + enerd->term[F_LJ_RECIP] = Vlr_lj + ewaldOutput.Vcorr_lj; + + if (debug) + { + fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", + Vlr_q, ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]); + pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM); + pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", + Vlr_lj, ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]); + pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM); + } + } + else + { + /* Is there a reaction-field exclusion correction needed? + * With the Verlet scheme, exclusion forces are calculated + * in the non-bonded kernel. + */ + if (ir->cutoff_scheme != ecutsVERLET && EEL_RF(fr->ic->eeltype)) + { + real dvdl_rf_excl = 0; + enerd->term[F_RF_EXCL] = + RF_excl_correction(fr, graph, md, excl, DOMAINDECOMP(cr), + x, forceForUseWithShiftForces, + fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl); + + enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl; + } + } + + if (debug) + { + print_nrnb(debug, nrnb); + } + +#if GMX_MPI + if (TAKETIME) + { + t2 = MPI_Wtime(); + MPI_Barrier(cr->mpi_comm_mygroup); + t3 = MPI_Wtime(); + fr->t_wait += t3-t2; + if (fr->timesteps == 11) + { + char buf[22]; + fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", + cr->nodeid, gmx_step_str(fr->timesteps, buf), + 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), + (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); + } + fr->timesteps++; + } +#endif + + if (debug) + { + pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + } + +} + +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) +{ + int i, n2; + + for (i = 0; i < F_NRE; i++) + { + enerd->term[i] = 0; + enerd->foreign_term[i] = 0; + } + + + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0; + enerd->dvdl_nonlin[i] = 0; + } + + n2 = ngener*ngener; + if (debug) + { + fprintf(debug, "Creating %d sized group matrix for energies\n", n2); + } + enerd->grpp.nener = n2; + enerd->foreign_grpp.nener = n2; + for (i = 0; (i < egNR); i++) + { + snew(enerd->grpp.ener[i], n2); + snew(enerd->foreign_grpp.ener[i], n2); + } + + if (n_lambda) + { + enerd->n_lambda = 1 + n_lambda; + snew(enerd->enerpart_lambda, enerd->n_lambda); + } + else + { + enerd->n_lambda = 0; + } +} + +void destroy_enerdata(gmx_enerdata_t *enerd) +{ + int i; + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->grpp.ener[i]); + } + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->foreign_grpp.ener[i]); + } + + if (enerd->n_lambda) + { + sfree(enerd->enerpart_lambda); + } +} + +static real sum_v(int n, const real v[]) +{ + real t; + int i; + + t = 0.0; + for (i = 0; (i < n); i++) + { + t = t + v[i]; + } + + return t; +} + +void sum_epot(gmx_grppairener_t *grpp, real *epot) +{ + int i; + + /* Accumulate energies */ + epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); + epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); + epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); + epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); + +/* lattice part of LR doesnt belong to any group + * and has been added earlier + */ + epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); + + epot[F_EPOT] = 0; + for (i = 0; (i < F_EPOT); i++) + { + if (i != F_DISRESVIOL && i != F_ORIRESDEV) + { + epot[F_EPOT] += epot[i]; + } + } +} + +void sum_dhdl(gmx_enerdata_t *enerd, gmx::ArrayRef<const real> lambda, t_lambda *fepvals) +{ + int index; + + enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ + enerd->term[F_DVDL] = 0.0; + for (int i = 0; i < efptNR; i++) + { + if (fepvals->separate_dvdl[i]) + { + /* could this be done more readably/compactly? */ + switch (i) + { + case (efptMASS): + index = F_DKDL; + break; + case (efptCOUL): + index = F_DVDL_COUL; + break; + case (efptVDW): + index = F_DVDL_VDW; + break; + case (efptBONDED): + index = F_DVDL_BONDED; + break; + case (efptRESTRAINT): + index = F_DVDL_RESTRAINT; + break; + default: + index = F_DVDL; + break; + } + enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + else + { + enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + } + + if (fepvals->separate_dvdl[efptBONDED]) + { + enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; + } + else + { + enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; + } + + for (int i = 0; i < fepvals->n_lambda; i++) + { + /* note we are iterating over fepvals here! + For the current lam, dlam = 0 automatically, + so we don't need to add anything to the + enerd->enerpart_lambda[0] */ + + /* we don't need to worry about dvdl_lin contributions to dE at + current lambda, because the contributions to the current + lambda are automatically zeroed */ + + double &enerpart_lambda = enerd->enerpart_lambda[i + 1]; + + for (gmx::index j = 0; j < lambda.size(); j++) + { + /* Note that this loop is over all dhdl components, not just the separated ones */ + const double dlam = fepvals->all_lambda[j][i] - lambda[j]; + + enerpart_lambda += dlam*enerd->dvdl_lin[j]; + + /* Constraints can not be evaluated at foreign lambdas, so we add + * a linear extrapolation. This is an approximation, but usually + * quite accurate since constraints change little between lambdas. + */ + if ((j == efptBONDED && fepvals->separate_dvdl[efptBONDED]) || + (j == efptFEP && !fepvals->separate_dvdl[efptBONDED])) + { + enerpart_lambda += dlam*enerd->term[F_DVDL_CONSTR]; + } + + if (j == efptMASS && !fepvals->separate_dvdl[j]) + { + enerpart_lambda += dlam*enerd->term[F_DKDL]; + } + + if (debug) + { + fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", + fepvals->all_lambda[j][i], efpt_names[j], + enerpart_lambda - enerd->enerpart_lambda[0], + dlam, enerd->dvdl_lin[j]); + } + } + } + + /* The constrain contribution is now included in other terms, so clear it */ + enerd->term[F_DVDL_CONSTR] = 0; +} + + +void reset_foreign_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all foreign energy components. Foreign energies always called on + neighbor search steps */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->foreign_grpp.ener[i][j] = 0.0; + } + } + + /* potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->foreign_term[i] = 0.0; + } +} + +void reset_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all energy components. */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->grpp.ener[i][j] = 0.0; + } + } + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0.0; + enerd->dvdl_nonlin[i] = 0.0; + } + + /* Normal potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->term[i] = 0.0; + } + enerd->term[F_DVDL] = 0.0; + enerd->term[F_DVDL_COUL] = 0.0; + enerd->term[F_DVDL_VDW] = 0.0; + enerd->term[F_DVDL_BONDED] = 0.0; + enerd->term[F_DVDL_RESTRAINT] = 0.0; + enerd->term[F_DKDL] = 0.0; + if (enerd->n_lambda > 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + enerd->enerpart_lambda[i] = 0.0; + } + } + /* reset foreign energy data - separate function since we also call it elsewhere */ + reset_foreign_enerdata(enerd); +} diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.cpp b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4d9c29a1d5ad78296794ffa6fd2f8a6599aa560c --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.cpp @@ -0,0 +1,263 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file declares helper functionality for legacy option handling for mdrun + * + * \author Berk Hess <hess@kth.se> + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Erik Lindahl <erik@kth.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "legacymdrunoptions.h" + +#include "config.h" + +#include <cstring> + +#include "gromacs/gmxlib/network.h" +#include "gromacs/math/functions.h" +#include "gromacs/mdrun/multisim.h" +#include "gromacs/mdrunutility/handlerestart.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/utility/arraysize.h" +#include "gromacs/utility/fatalerror.h" + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +extern void(*plumedcmd)(plumed,const char*,const void*); +/* END PLUMED */ + +namespace gmx +{ + +/*! \brief Return whether the command-line parameter that + * will trigger a multi-simulation is set */ +static bool is_multisim_option_set(int argc, const char *const argv[]) +{ + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "-multidir") == 0) + { + return true; + } + } + return false; +} + +int LegacyMdrunOptions::updateFromCommandLine(int argc, char **argv, ArrayRef<const char *> desc) +{ + unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM; + // With -multidir, the working directory still needs to be + // changed, so we can't check for the existence of files during + // parsing. It isn't useful to do any completion based on file + // system contents, either. + if (is_multisim_option_set(argc, argv)) + { + PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING; + } + + if (!parse_common_args(&argc, argv, PCA_Flags, + static_cast<int>(filenames.size()), filenames.data(), asize(pa), pa, + static_cast<int>(desc.size()), desc.data(), 0, nullptr, &oenv)) + { + return 0; + } + + // Handle the options that permits the user to either declare + // which compatible GPUs are availble for use, or to select a GPU + // task assignment. Either could be in an environment variable (so + // that there is a way to customize it, when using MPI in + // heterogeneous contexts). + { + // TODO Argument parsing can't handle std::string. We should + // fix that by changing the parsing, once more of the roles of + // handling, validating and implementing defaults for user + // command-line options have been seperated. + hw_opt.gpuIdsAvailable = gpuIdsAvailable; + hw_opt.userGpuTaskAssignment = userGpuTaskAssignment; + + const char *env = getenv("GMX_GPU_ID"); + if (env != nullptr) + { + if (!hw_opt.gpuIdsAvailable.empty()) + { + gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time"); + } + hw_opt.gpuIdsAvailable = env; + } + + env = getenv("GMX_GPUTASKS"); + if (env != nullptr) + { + if (!hw_opt.userGpuTaskAssignment.empty()) + { + gmx_fatal(FARGS, "GMX_GPUTASKS and -gputasks can not be used at the same time"); + } + hw_opt.userGpuTaskAssignment = env; + } + + if (!hw_opt.gpuIdsAvailable.empty() && !hw_opt.userGpuTaskAssignment.empty()) + { + gmx_fatal(FARGS, "-gpu_id and -gputasks cannot be used at the same time"); + } + } + + hw_opt.thread_affinity = nenum(thread_aff_opt_choices); + + // now check for a multi-simulation + ArrayRef<const std::string> multidir = opt2fnsIfOptionSet("-multidir", + static_cast<int>(filenames.size()), + filenames.data()); + + if (replExParams.exchangeInterval != 0 && multidir.size() < 2) + { + gmx_fatal(FARGS, "Need at least two replicas for replica exchange (use option -multidir)"); + } + + if (replExParams.numExchanges < 0) + { + gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); + } + + ms = init_multisystem(MPI_COMM_WORLD, multidir); + + /* Prepare the intra-simulation communication */ + // TODO consolidate this with init_commrec, after changing the + // relative ordering of init_commrec and init_multisystem +#if GMX_MPI + if (ms != nullptr) + { + cr->nnodes = cr->nnodes / ms->nsim; + MPI_Comm_split(MPI_COMM_WORLD, ms->sim, cr->sim_nodeid, &cr->mpi_comm_mysim); + cr->mpi_comm_mygroup = cr->mpi_comm_mysim; + MPI_Comm_rank(cr->mpi_comm_mysim, &cr->sim_nodeid); + MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid); + } +#endif + + if (!opt2bSet("-cpi", + static_cast<int>(filenames.size()), filenames.data())) + { + // If we are not starting from a checkpoint we never allow files to be appended + // to, since that has caused a ton of strange behaviour and bugs in the past. + if (opt2parg_bSet("-append", asize(pa), pa)) + { + // If the user explicitly used the -append option, explain that it is not possible. + gmx_fatal(FARGS, "GROMACS can only append to files when restarting from a checkpoint."); + } + else + { + // If the user did not say anything explicit, just disable appending. + bTryToAppendFiles = FALSE; + } + } + + ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions; + + continuationOptions.appendFilesOptionSet = opt2parg_bSet("-append", asize(pa), pa); + + handleRestart(cr, ms, bTryToAppendFiles, + static_cast<int>(filenames.size()), + filenames.data(), + &continuationOptions.appendFiles, + &continuationOptions.startedFromCheckpoint); + + mdrunOptions.rerun = opt2bSet("-rerun", + static_cast<int>(filenames.size()), + filenames.data()); + mdrunOptions.ntompOptionIsSet = opt2parg_bSet("-ntomp", asize(pa), pa); + + domdecOptions.rankOrder = static_cast<DdRankOrder>(nenum(ddrank_opt_choices)); + domdecOptions.dlbOption = static_cast<DlbOption>(nenum(dddlb_opt_choices)); + domdecOptions.numCells[XX] = roundToInt(realddxyz[XX]); + domdecOptions.numCells[YY] = roundToInt(realddxyz[YY]); + domdecOptions.numCells[ZZ] = roundToInt(realddxyz[ZZ]); + + /* PLUMED */ + plumedswitch=0; + if (opt2bSet("-plumed", static_cast<int>(filenames.size()), filenames.data())) plumedswitch=1; + if(plumedswitch){ + plumedcmd=plumed_cmd; + int real_precision=sizeof(real); + real energyUnits=1.0; + real lengthUnits=1.0; + real timeUnits=1.0; + + if(!plumed_installed()){ + gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable."); + } + plumedmain=plumed_create(); + plumed_cmd(plumedmain,"setRealPrecision",&real_precision); + // this is not necessary for gromacs units: + plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits); + plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits); + plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits); + // + plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,static_cast<int>(filenames.size()), filenames.data())); + plumedswitch=1; + } + /* PLUMED HREX*/ + if(getenv("PLUMED_HREX")) plumed_hrex=1; + if(plumed_hrex){ + if(!plumedswitch) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) requires -plumed"); + if(replExParams.exchangeInterval==0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) replica exchange"); + if(replExParams.numExchanges!=0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) not compatible with -nex"); + } + /* END PLUMED HREX */ + + /* END PLUMED */ + + return 1; +} + +LegacyMdrunOptions::~LegacyMdrunOptions() +{ + if (GMX_LIB_MPI) + { + done_commrec(cr); + } + done_multisim(ms); +} + +} // namespace gmx diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..de685e90348bfff377e9fe6cf20efa9fc2d516d6 --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.cpp.preplumed @@ -0,0 +1,222 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file declares helper functionality for legacy option handling for mdrun + * + * \author Berk Hess <hess@kth.se> + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Erik Lindahl <erik@kth.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "legacymdrunoptions.h" + +#include "config.h" + +#include <cstring> + +#include "gromacs/gmxlib/network.h" +#include "gromacs/math/functions.h" +#include "gromacs/mdrun/multisim.h" +#include "gromacs/mdrunutility/handlerestart.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/utility/arraysize.h" +#include "gromacs/utility/fatalerror.h" + +namespace gmx +{ + +/*! \brief Return whether the command-line parameter that + * will trigger a multi-simulation is set */ +static bool is_multisim_option_set(int argc, const char *const argv[]) +{ + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "-multidir") == 0) + { + return true; + } + } + return false; +} + +int LegacyMdrunOptions::updateFromCommandLine(int argc, char **argv, ArrayRef<const char *> desc) +{ + unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM; + // With -multidir, the working directory still needs to be + // changed, so we can't check for the existence of files during + // parsing. It isn't useful to do any completion based on file + // system contents, either. + if (is_multisim_option_set(argc, argv)) + { + PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING; + } + + if (!parse_common_args(&argc, argv, PCA_Flags, + static_cast<int>(filenames.size()), filenames.data(), asize(pa), pa, + static_cast<int>(desc.size()), desc.data(), 0, nullptr, &oenv)) + { + return 0; + } + + // Handle the options that permits the user to either declare + // which compatible GPUs are availble for use, or to select a GPU + // task assignment. Either could be in an environment variable (so + // that there is a way to customize it, when using MPI in + // heterogeneous contexts). + { + // TODO Argument parsing can't handle std::string. We should + // fix that by changing the parsing, once more of the roles of + // handling, validating and implementing defaults for user + // command-line options have been seperated. + hw_opt.gpuIdsAvailable = gpuIdsAvailable; + hw_opt.userGpuTaskAssignment = userGpuTaskAssignment; + + const char *env = getenv("GMX_GPU_ID"); + if (env != nullptr) + { + if (!hw_opt.gpuIdsAvailable.empty()) + { + gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time"); + } + hw_opt.gpuIdsAvailable = env; + } + + env = getenv("GMX_GPUTASKS"); + if (env != nullptr) + { + if (!hw_opt.userGpuTaskAssignment.empty()) + { + gmx_fatal(FARGS, "GMX_GPUTASKS and -gputasks can not be used at the same time"); + } + hw_opt.userGpuTaskAssignment = env; + } + + if (!hw_opt.gpuIdsAvailable.empty() && !hw_opt.userGpuTaskAssignment.empty()) + { + gmx_fatal(FARGS, "-gpu_id and -gputasks cannot be used at the same time"); + } + } + + hw_opt.thread_affinity = nenum(thread_aff_opt_choices); + + // now check for a multi-simulation + ArrayRef<const std::string> multidir = opt2fnsIfOptionSet("-multidir", + static_cast<int>(filenames.size()), + filenames.data()); + + if (replExParams.exchangeInterval != 0 && multidir.size() < 2) + { + gmx_fatal(FARGS, "Need at least two replicas for replica exchange (use option -multidir)"); + } + + if (replExParams.numExchanges < 0) + { + gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive"); + } + + ms = init_multisystem(MPI_COMM_WORLD, multidir); + + /* Prepare the intra-simulation communication */ + // TODO consolidate this with init_commrec, after changing the + // relative ordering of init_commrec and init_multisystem +#if GMX_MPI + if (ms != nullptr) + { + cr->nnodes = cr->nnodes / ms->nsim; + MPI_Comm_split(MPI_COMM_WORLD, ms->sim, cr->sim_nodeid, &cr->mpi_comm_mysim); + cr->mpi_comm_mygroup = cr->mpi_comm_mysim; + MPI_Comm_rank(cr->mpi_comm_mysim, &cr->sim_nodeid); + MPI_Comm_rank(cr->mpi_comm_mygroup, &cr->nodeid); + } +#endif + + if (!opt2bSet("-cpi", + static_cast<int>(filenames.size()), filenames.data())) + { + // If we are not starting from a checkpoint we never allow files to be appended + // to, since that has caused a ton of strange behaviour and bugs in the past. + if (opt2parg_bSet("-append", asize(pa), pa)) + { + // If the user explicitly used the -append option, explain that it is not possible. + gmx_fatal(FARGS, "GROMACS can only append to files when restarting from a checkpoint."); + } + else + { + // If the user did not say anything explicit, just disable appending. + bTryToAppendFiles = FALSE; + } + } + + ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions; + + continuationOptions.appendFilesOptionSet = opt2parg_bSet("-append", asize(pa), pa); + + handleRestart(cr, ms, bTryToAppendFiles, + static_cast<int>(filenames.size()), + filenames.data(), + &continuationOptions.appendFiles, + &continuationOptions.startedFromCheckpoint); + + mdrunOptions.rerun = opt2bSet("-rerun", + static_cast<int>(filenames.size()), + filenames.data()); + mdrunOptions.ntompOptionIsSet = opt2parg_bSet("-ntomp", asize(pa), pa); + + domdecOptions.rankOrder = static_cast<DdRankOrder>(nenum(ddrank_opt_choices)); + domdecOptions.dlbOption = static_cast<DlbOption>(nenum(dddlb_opt_choices)); + domdecOptions.numCells[XX] = roundToInt(realddxyz[XX]); + domdecOptions.numCells[YY] = roundToInt(realddxyz[YY]); + domdecOptions.numCells[ZZ] = roundToInt(realddxyz[ZZ]); + + return 1; +} + +LegacyMdrunOptions::~LegacyMdrunOptions() +{ + if (GMX_LIB_MPI) + { + done_commrec(cr); + } + done_multisim(ms); +} + +} // namespace gmx diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.h b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.h new file mode 100644 index 0000000000000000000000000000000000000000..88520e58be55f24710cedde0e53bd016d7301c35 --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.h @@ -0,0 +1,301 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * + * \brief This file declares helper functionality for legacy option handling for mdrun + * + * \author Berk Hess <hess@kth.se> + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Erik Lindahl <erik@kth.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + * \inlibraryapi + */ +#ifndef GMX_MDRUN_LEGACYMDRUNOPTIONS_H +#define GMX_MDRUN_LEGACYMDRUNOPTIONS_H + +#include "gromacs/commandline/filenm.h" +#include "gromacs/commandline/pargs.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/hardware/hw_info.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdrun/logging.h" + +#include "replicaexchange.h" + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +extern void(*plumedcmd)(plumed,const char*,const void*); +/* END PLUMED */ + +/* PLUMED HREX */ +int plumed_hrex; +/* END PLUMED HREX */ + +struct gmx_multisim_t; + +namespace gmx +{ + +/*! \libinternal + * \brief This class provides the same command-line option + * functionality to both CLI and API sessions. + * + * This class should not exist, but is necessary now to introduce + * support for the CLI and API without duplicating code. It should be + * eliminated following the TODOs below. + * + * \todo Modules in mdrun should acquire proper option handling so + * that all of these declarations and defaults are local to the + * modules. + * + * \todo Contextual aspects, such as working directory, MPI + * environment, and environment variable handling are more properly + * the role of SimulationContext, and should be moved there */ +class LegacyMdrunOptions +{ + public: + //! Ongoing collection of mdrun options + MdrunOptions mdrunOptions; + //! Options for the domain decomposition. + DomdecOptions domdecOptions; + //! Parallelism-related user options. + gmx_hw_opt_t hw_opt; + //! Command-line override for the duration of a neighbor list with the Verlet scheme. + int nstlist_cmdline = 0; + //! Parameters for replica-exchange simulations. + ReplicaExchangeParameters replExParams; + + //! Filename options to fill from command-line argument values. + std::vector<t_filenm> filenames = + {{{ efTPR, nullptr, nullptr, ffREAD }, + { efTRN, "-o", nullptr, ffWRITE }, + { efCOMPRESSED, "-x", nullptr, ffOPTWR }, + { efCPT, "-cpi", nullptr, ffOPTRD | ffALLOW_MISSING }, + { efCPT, "-cpo", nullptr, ffOPTWR }, + { efSTO, "-c", "confout", ffWRITE }, + { efEDR, "-e", "ener", ffWRITE }, + { efLOG, "-g", "md", ffWRITE }, + { efXVG, "-dhdl", "dhdl", ffOPTWR }, + { efXVG, "-field", "field", ffOPTWR }, + { efXVG, "-table", "table", ffOPTRD }, + { efXVG, "-tablep", "tablep", ffOPTRD }, + { efXVG, "-tableb", "table", ffOPTRDMULT }, + { efTRX, "-rerun", "rerun", ffOPTRD }, + { efXVG, "-tpi", "tpi", ffOPTWR }, + { efXVG, "-tpid", "tpidist", ffOPTWR }, + { efEDI, "-ei", "sam", ffOPTRD }, + { efXVG, "-eo", "edsam", ffOPTWR }, + { efXVG, "-devout", "deviatie", ffOPTWR }, + { efXVG, "-runav", "runaver", ffOPTWR }, + { efXVG, "-px", "pullx", ffOPTWR }, + { efXVG, "-pf", "pullf", ffOPTWR }, + { efXVG, "-ro", "rotation", ffOPTWR }, + { efLOG, "-ra", "rotangles", ffOPTWR }, + { efLOG, "-rs", "rotslabs", ffOPTWR }, + { efLOG, "-rt", "rottorque", ffOPTWR }, + { efMTX, "-mtx", "nm", ffOPTWR }, + { efRND, "-multidir", nullptr, ffOPTRDMULT}, + { efXVG, "-awh", "awhinit", ffOPTRD }, + { efDAT, "-plumed", "plumed", ffOPTRD }, /* PLUMED */ + { efDAT, "-membed", "membed", ffOPTRD }, + { efTOP, "-mp", "membed", ffOPTRD }, + { efNDX, "-mn", "membed", ffOPTRD }, + { efXVG, "-if", "imdforces", ffOPTWR }, + { efXVG, "-swap", "swapions", ffOPTWR }}}; + + //! Print a warning if any force is larger than this (in kJ/mol nm). + real pforce = -1; + + /*! \brief Output context for writing text files + * + * \todo Clarify initialization, ownership, and lifetime. */ + gmx_output_env_t *oenv = nullptr; + + //! Handle to file used for logging. + LogFilePtr logFileGuard = nullptr; + + /*! \brief Command line options, defaults, docs and storage for them to fill. */ + /*! \{ */ + rvec realddxyz = {0, 0, 0}; + const char *ddrank_opt_choices[static_cast<int>(DdRankOrder::nr)+1] = + { nullptr, "interleave", "pp_pme", "cartesian", nullptr }; + const char *dddlb_opt_choices[static_cast<int>(DlbOption::nr)+1] = + { nullptr, "auto", "no", "yes", nullptr }; + const char *thread_aff_opt_choices[threadaffNR+1] = + { nullptr, "auto", "on", "off", nullptr }; + const char *nbpu_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + const char *pme_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + const char *pme_fft_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + const char *bonded_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + gmx_bool bTryToAppendFiles = TRUE; + const char *gpuIdsAvailable = ""; + const char *userGpuTaskAssignment = ""; + + ImdOptions &imdOptions = mdrunOptions.imdOptions; + + t_pargs pa[49] = { + + { "-dd", FALSE, etRVEC, {&realddxyz}, + "Domain decomposition grid, 0 is optimize" }, + { "-ddorder", FALSE, etENUM, {ddrank_opt_choices}, + "DD rank order" }, + { "-npme", FALSE, etINT, {&domdecOptions.numPmeRanks}, + "Number of separate ranks to be used for PME, -1 is guess" }, + { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, + "Total number of threads to start (0 is guess)" }, + { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, + "Number of thread-MPI ranks to start (0 is guess)" }, + { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, + "Number of OpenMP threads per MPI rank to start (0 is guess)" }, + { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, + "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, + { "-pin", FALSE, etENUM, {thread_aff_opt_choices}, + "Whether mdrun should try to set thread affinities" }, + { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, + "The lowest logical core number to which mdrun should pin the first thread" }, + { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, + "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, + { "-gpu_id", FALSE, etSTR, {&gpuIdsAvailable}, + "List of unique GPU device IDs available to use" }, + { "-gputasks", FALSE, etSTR, {&userGpuTaskAssignment}, + "List of GPU device IDs, mapping each PP task on each node to a device" }, + { "-ddcheck", FALSE, etBOOL, {&domdecOptions.checkBondedInteractions}, + "Check for all bonded interactions with DD" }, + { "-ddbondcomm", FALSE, etBOOL, {&domdecOptions.useBondedCommunication}, + "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, + { "-rdd", FALSE, etREAL, {&domdecOptions.minimumCommunicationRange}, + "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, + { "-rcon", FALSE, etREAL, {&domdecOptions.constraintCommunicationRange}, + "Maximum distance for P-LINCS (nm), 0 is estimate" }, + { "-dlb", FALSE, etENUM, {dddlb_opt_choices}, + "Dynamic load balancing (with DD)" }, + { "-dds", FALSE, etREAL, {&domdecOptions.dlbScaling}, + "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to " + "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." }, + { "-ddcsx", FALSE, etSTR, {&domdecOptions.cellSizeX}, + "HIDDENA string containing a vector of the relative sizes in the x " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsy", FALSE, etSTR, {&domdecOptions.cellSizeY}, + "HIDDENA string containing a vector of the relative sizes in the y " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsz", FALSE, etSTR, {&domdecOptions.cellSizeZ}, + "HIDDENA string containing a vector of the relative sizes in the z " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-gcom", FALSE, etINT, {&mdrunOptions.globalCommunicationInterval}, + "Global communication frequency" }, + { "-nb", FALSE, etENUM, {nbpu_opt_choices}, + "Calculate non-bonded interactions on" }, + { "-nstlist", FALSE, etINT, {&nstlist_cmdline}, + "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, + { "-tunepme", FALSE, etBOOL, {&mdrunOptions.tunePme}, + "Optimize PME load between PP/PME ranks or GPU/CPU (only with the Verlet cut-off scheme)" }, + { "-pme", FALSE, etENUM, {pme_opt_choices}, + "Perform PME calculations on" }, + { "-pmefft", FALSE, etENUM, {pme_fft_opt_choices}, + "Perform PME FFT calculations on" }, + { "-bonded", FALSE, etENUM, {bonded_opt_choices}, + "Perform bonded calculations on" }, + { "-v", FALSE, etBOOL, {&mdrunOptions.verbose}, + "Be loud and noisy" }, + { "-pforce", FALSE, etREAL, {&pforce}, + "Print all forces larger than this (kJ/mol nm)" }, + { "-reprod", FALSE, etBOOL, {&mdrunOptions.reproducible}, + "Try to avoid optimizations that affect binary reproducibility" }, + { "-cpt", FALSE, etREAL, {&mdrunOptions.checkpointOptions.period}, + "Checkpoint interval (minutes)" }, + { "-cpnum", FALSE, etBOOL, {&mdrunOptions.checkpointOptions.keepAndNumberCheckpointFiles}, + "Keep and number checkpoint files" }, + { "-append", FALSE, etBOOL, {&bTryToAppendFiles}, + "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, + { "-nsteps", FALSE, etINT64, {&mdrunOptions.numStepsCommandline}, + "Run this number of steps, overrides .mdp file option (-1 means infinite, -2 means use mdp option, smaller is invalid)" }, + { "-maxh", FALSE, etREAL, {&mdrunOptions.maximumHoursToRun}, + "Terminate after 0.99 times this time (hours)" }, + { "-replex", FALSE, etINT, {&replExParams.exchangeInterval}, + "Attempt replica exchange periodically with this period (steps)" }, + { "-nex", FALSE, etINT, {&replExParams.numExchanges}, + "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, + { "-reseed", FALSE, etINT, {&replExParams.randomSeed}, + "Seed for replica exchange, -1 is generate a seed" }, + { "-hrex", FALSE, etBOOL, {&plumed_hrex}, /* PLUMED HREX */ + "Enable hamiltonian replica exchange" }, + { "-imdport", FALSE, etINT, {&imdOptions.port}, + "HIDDENIMD listening port" }, + { "-imdwait", FALSE, etBOOL, {&imdOptions.wait}, + "HIDDENPause the simulation while no IMD client is connected" }, + { "-imdterm", FALSE, etBOOL, {&imdOptions.terminatable}, + "HIDDENAllow termination of the simulation from IMD client" }, + { "-imdpull", FALSE, etBOOL, {&imdOptions.pull}, + "HIDDENAllow pulling in the simulation from IMD client" }, + { "-rerunvsite", FALSE, etBOOL, {&mdrunOptions.rerunConstructVsites}, + "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, + { "-confout", FALSE, etBOOL, {&mdrunOptions.writeConfout}, + "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, + { "-stepout", FALSE, etINT, {&mdrunOptions.verboseStepPrintInterval}, + "HIDDENFrequency of writing the remaining wall clock time for the run" }, + { "-resetstep", FALSE, etINT, {&mdrunOptions.timingOptions.resetStep}, + "HIDDENReset cycle counters after these many time steps" }, + { "-resethway", FALSE, etBOOL, {&mdrunOptions.timingOptions.resetHalfway}, + "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } + }; + /*! \} */ + + //! Handle to communication object. + t_commrec *cr = nullptr; + //! Multi-simulation object. + gmx_multisim_t *ms = nullptr; + + //! Parses the command-line input and prepares to start mdrun. + int updateFromCommandLine(int argc, char **argv, ArrayRef<const char *> desc); + + ~LegacyMdrunOptions(); +}; + +} // end namespace gmx + +#endif diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..637cd2a6b93dabe9fd632d92e30794922c3a8fec --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/legacymdrunoptions.h.preplumed @@ -0,0 +1,287 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * + * \brief This file declares helper functionality for legacy option handling for mdrun + * + * \author Berk Hess <hess@kth.se> + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Erik Lindahl <erik@kth.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + * \inlibraryapi + */ +#ifndef GMX_MDRUN_LEGACYMDRUNOPTIONS_H +#define GMX_MDRUN_LEGACYMDRUNOPTIONS_H + +#include "gromacs/commandline/filenm.h" +#include "gromacs/commandline/pargs.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/hardware/hw_info.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdrun/logging.h" + +#include "replicaexchange.h" + +struct gmx_multisim_t; + +namespace gmx +{ + +/*! \libinternal + * \brief This class provides the same command-line option + * functionality to both CLI and API sessions. + * + * This class should not exist, but is necessary now to introduce + * support for the CLI and API without duplicating code. It should be + * eliminated following the TODOs below. + * + * \todo Modules in mdrun should acquire proper option handling so + * that all of these declarations and defaults are local to the + * modules. + * + * \todo Contextual aspects, such as working directory, MPI + * environment, and environment variable handling are more properly + * the role of SimulationContext, and should be moved there */ +class LegacyMdrunOptions +{ + public: + //! Ongoing collection of mdrun options + MdrunOptions mdrunOptions; + //! Options for the domain decomposition. + DomdecOptions domdecOptions; + //! Parallelism-related user options. + gmx_hw_opt_t hw_opt; + //! Command-line override for the duration of a neighbor list with the Verlet scheme. + int nstlist_cmdline = 0; + //! Parameters for replica-exchange simulations. + ReplicaExchangeParameters replExParams; + + //! Filename options to fill from command-line argument values. + std::vector<t_filenm> filenames = + {{{ efTPR, nullptr, nullptr, ffREAD }, + { efTRN, "-o", nullptr, ffWRITE }, + { efCOMPRESSED, "-x", nullptr, ffOPTWR }, + { efCPT, "-cpi", nullptr, ffOPTRD | ffALLOW_MISSING }, + { efCPT, "-cpo", nullptr, ffOPTWR }, + { efSTO, "-c", "confout", ffWRITE }, + { efEDR, "-e", "ener", ffWRITE }, + { efLOG, "-g", "md", ffWRITE }, + { efXVG, "-dhdl", "dhdl", ffOPTWR }, + { efXVG, "-field", "field", ffOPTWR }, + { efXVG, "-table", "table", ffOPTRD }, + { efXVG, "-tablep", "tablep", ffOPTRD }, + { efXVG, "-tableb", "table", ffOPTRDMULT }, + { efTRX, "-rerun", "rerun", ffOPTRD }, + { efXVG, "-tpi", "tpi", ffOPTWR }, + { efXVG, "-tpid", "tpidist", ffOPTWR }, + { efEDI, "-ei", "sam", ffOPTRD }, + { efXVG, "-eo", "edsam", ffOPTWR }, + { efXVG, "-devout", "deviatie", ffOPTWR }, + { efXVG, "-runav", "runaver", ffOPTWR }, + { efXVG, "-px", "pullx", ffOPTWR }, + { efXVG, "-pf", "pullf", ffOPTWR }, + { efXVG, "-ro", "rotation", ffOPTWR }, + { efLOG, "-ra", "rotangles", ffOPTWR }, + { efLOG, "-rs", "rotslabs", ffOPTWR }, + { efLOG, "-rt", "rottorque", ffOPTWR }, + { efMTX, "-mtx", "nm", ffOPTWR }, + { efRND, "-multidir", nullptr, ffOPTRDMULT}, + { efXVG, "-awh", "awhinit", ffOPTRD }, + { efDAT, "-membed", "membed", ffOPTRD }, + { efTOP, "-mp", "membed", ffOPTRD }, + { efNDX, "-mn", "membed", ffOPTRD }, + { efXVG, "-if", "imdforces", ffOPTWR }, + { efXVG, "-swap", "swapions", ffOPTWR }}}; + + //! Print a warning if any force is larger than this (in kJ/mol nm). + real pforce = -1; + + /*! \brief Output context for writing text files + * + * \todo Clarify initialization, ownership, and lifetime. */ + gmx_output_env_t *oenv = nullptr; + + //! Handle to file used for logging. + LogFilePtr logFileGuard = nullptr; + + /*! \brief Command line options, defaults, docs and storage for them to fill. */ + /*! \{ */ + rvec realddxyz = {0, 0, 0}; + const char *ddrank_opt_choices[static_cast<int>(DdRankOrder::nr)+1] = + { nullptr, "interleave", "pp_pme", "cartesian", nullptr }; + const char *dddlb_opt_choices[static_cast<int>(DlbOption::nr)+1] = + { nullptr, "auto", "no", "yes", nullptr }; + const char *thread_aff_opt_choices[threadaffNR+1] = + { nullptr, "auto", "on", "off", nullptr }; + const char *nbpu_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + const char *pme_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + const char *pme_fft_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + const char *bonded_opt_choices[5] = + { nullptr, "auto", "cpu", "gpu", nullptr }; + gmx_bool bTryToAppendFiles = TRUE; + const char *gpuIdsAvailable = ""; + const char *userGpuTaskAssignment = ""; + + ImdOptions &imdOptions = mdrunOptions.imdOptions; + + t_pargs pa[48] = { + + { "-dd", FALSE, etRVEC, {&realddxyz}, + "Domain decomposition grid, 0 is optimize" }, + { "-ddorder", FALSE, etENUM, {ddrank_opt_choices}, + "DD rank order" }, + { "-npme", FALSE, etINT, {&domdecOptions.numPmeRanks}, + "Number of separate ranks to be used for PME, -1 is guess" }, + { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot}, + "Total number of threads to start (0 is guess)" }, + { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi}, + "Number of thread-MPI ranks to start (0 is guess)" }, + { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp}, + "Number of OpenMP threads per MPI rank to start (0 is guess)" }, + { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme}, + "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" }, + { "-pin", FALSE, etENUM, {thread_aff_opt_choices}, + "Whether mdrun should try to set thread affinities" }, + { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset}, + "The lowest logical core number to which mdrun should pin the first thread" }, + { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride}, + "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" }, + { "-gpu_id", FALSE, etSTR, {&gpuIdsAvailable}, + "List of unique GPU device IDs available to use" }, + { "-gputasks", FALSE, etSTR, {&userGpuTaskAssignment}, + "List of GPU device IDs, mapping each PP task on each node to a device" }, + { "-ddcheck", FALSE, etBOOL, {&domdecOptions.checkBondedInteractions}, + "Check for all bonded interactions with DD" }, + { "-ddbondcomm", FALSE, etBOOL, {&domdecOptions.useBondedCommunication}, + "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" }, + { "-rdd", FALSE, etREAL, {&domdecOptions.minimumCommunicationRange}, + "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" }, + { "-rcon", FALSE, etREAL, {&domdecOptions.constraintCommunicationRange}, + "Maximum distance for P-LINCS (nm), 0 is estimate" }, + { "-dlb", FALSE, etENUM, {dddlb_opt_choices}, + "Dynamic load balancing (with DD)" }, + { "-dds", FALSE, etREAL, {&domdecOptions.dlbScaling}, + "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to " + "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." }, + { "-ddcsx", FALSE, etSTR, {&domdecOptions.cellSizeX}, + "HIDDENA string containing a vector of the relative sizes in the x " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsy", FALSE, etSTR, {&domdecOptions.cellSizeY}, + "HIDDENA string containing a vector of the relative sizes in the y " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-ddcsz", FALSE, etSTR, {&domdecOptions.cellSizeZ}, + "HIDDENA string containing a vector of the relative sizes in the z " + "direction of the corresponding DD cells. Only effective with static " + "load balancing." }, + { "-gcom", FALSE, etINT, {&mdrunOptions.globalCommunicationInterval}, + "Global communication frequency" }, + { "-nb", FALSE, etENUM, {nbpu_opt_choices}, + "Calculate non-bonded interactions on" }, + { "-nstlist", FALSE, etINT, {&nstlist_cmdline}, + "Set nstlist when using a Verlet buffer tolerance (0 is guess)" }, + { "-tunepme", FALSE, etBOOL, {&mdrunOptions.tunePme}, + "Optimize PME load between PP/PME ranks or GPU/CPU (only with the Verlet cut-off scheme)" }, + { "-pme", FALSE, etENUM, {pme_opt_choices}, + "Perform PME calculations on" }, + { "-pmefft", FALSE, etENUM, {pme_fft_opt_choices}, + "Perform PME FFT calculations on" }, + { "-bonded", FALSE, etENUM, {bonded_opt_choices}, + "Perform bonded calculations on" }, + { "-v", FALSE, etBOOL, {&mdrunOptions.verbose}, + "Be loud and noisy" }, + { "-pforce", FALSE, etREAL, {&pforce}, + "Print all forces larger than this (kJ/mol nm)" }, + { "-reprod", FALSE, etBOOL, {&mdrunOptions.reproducible}, + "Try to avoid optimizations that affect binary reproducibility" }, + { "-cpt", FALSE, etREAL, {&mdrunOptions.checkpointOptions.period}, + "Checkpoint interval (minutes)" }, + { "-cpnum", FALSE, etBOOL, {&mdrunOptions.checkpointOptions.keepAndNumberCheckpointFiles}, + "Keep and number checkpoint files" }, + { "-append", FALSE, etBOOL, {&bTryToAppendFiles}, + "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" }, + { "-nsteps", FALSE, etINT64, {&mdrunOptions.numStepsCommandline}, + "Run this number of steps, overrides .mdp file option (-1 means infinite, -2 means use mdp option, smaller is invalid)" }, + { "-maxh", FALSE, etREAL, {&mdrunOptions.maximumHoursToRun}, + "Terminate after 0.99 times this time (hours)" }, + { "-replex", FALSE, etINT, {&replExParams.exchangeInterval}, + "Attempt replica exchange periodically with this period (steps)" }, + { "-nex", FALSE, etINT, {&replExParams.numExchanges}, + "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." }, + { "-reseed", FALSE, etINT, {&replExParams.randomSeed}, + "Seed for replica exchange, -1 is generate a seed" }, + { "-imdport", FALSE, etINT, {&imdOptions.port}, + "HIDDENIMD listening port" }, + { "-imdwait", FALSE, etBOOL, {&imdOptions.wait}, + "HIDDENPause the simulation while no IMD client is connected" }, + { "-imdterm", FALSE, etBOOL, {&imdOptions.terminatable}, + "HIDDENAllow termination of the simulation from IMD client" }, + { "-imdpull", FALSE, etBOOL, {&imdOptions.pull}, + "HIDDENAllow pulling in the simulation from IMD client" }, + { "-rerunvsite", FALSE, etBOOL, {&mdrunOptions.rerunConstructVsites}, + "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" }, + { "-confout", FALSE, etBOOL, {&mdrunOptions.writeConfout}, + "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" }, + { "-stepout", FALSE, etINT, {&mdrunOptions.verboseStepPrintInterval}, + "HIDDENFrequency of writing the remaining wall clock time for the run" }, + { "-resetstep", FALSE, etINT, {&mdrunOptions.timingOptions.resetStep}, + "HIDDENReset cycle counters after these many time steps" }, + { "-resethway", FALSE, etBOOL, {&mdrunOptions.timingOptions.resetHalfway}, + "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" } + }; + /*! \} */ + + //! Handle to communication object. + t_commrec *cr = nullptr; + //! Multi-simulation object. + gmx_multisim_t *ms = nullptr; + + //! Parses the command-line input and prepares to start mdrun. + int updateFromCommandLine(int argc, char **argv, ArrayRef<const char *> desc); + + ~LegacyMdrunOptions(); +}; + +} // end namespace gmx + +#endif diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/md.cpp b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/md.cpp new file mode 100644 index 0000000000000000000000000000000000000000..887526a8d1dff7bdb05905ed784c1e500086501c --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/md.cpp @@ -0,0 +1,1728 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements the integrator for normal molecular dynamics simulations + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include <cinttypes> +#include <cmath> +#include <cstdio> +#include <cstdlib> + +#include <algorithm> +#include <memory> + +#include "gromacs/awh/awh.h" +#include "gromacs/commandline/filenm.h" +#include "gromacs/compat/make_unique.h" +#include "gromacs/domdec/collect.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_network.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/domdec/partition.h" +#include "gromacs/essentialdynamics/edsam.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme-load-balancing.h" +#include "gromacs/fileio/trxio.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/imd/imd.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/units.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vectypes.h" +#include "gromacs/mdlib/checkpointhandler.h" +#include "gromacs/mdlib/compute_io.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/ebin.h" +#include "gromacs/mdlib/expanded.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdoutf.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/mdsetup.h" +#include "gromacs/mdlib/membed.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/resethandler.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/simulationsignal.h" +#include "gromacs/mdlib/stophandler.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vcm.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/awh-history.h" +#include "gromacs/mdtypes/awh-params.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/df_history.h" +#include "gromacs/mdtypes/energyhistory.h" +#include "gromacs/mdtypes/fcdata.h" +#include "gromacs/mdtypes/forcerec.h" +#include "gromacs/mdtypes/group.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/interaction_const.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/observableshistory.h" +#include "gromacs/mdtypes/pullhistory.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/output.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/swap/swapcoords.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/atoms.h" +#include "gromacs/topology/idef.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/topology/topology.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/logger.h" +#include "gromacs/utility/real.h" +#include "gromacs/utility/smalloc.h" + +#include "integrator.h" +#include "replicaexchange.h" + +#if GMX_FAHCORE +#include "corewrap.h" +#endif + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +/* END PLUMED */ + +/* PLUMED HREX */ +extern int plumed_hrex; +/* END PLUMED HREX */ + +using gmx::SimulationSignaller; + +void gmx::Integrator::do_md() +{ + // TODO Historically, the EM and MD "integrators" used different + // names for the t_inputrec *parameter, but these must have the + // same name, now that it's a member of a struct. We use this ir + // alias to avoid a large ripple of nearly useless changes. + // t_inputrec is being replaced by IMdpOptionsProvider, so this + // will go away eventually. + t_inputrec *ir = inputrec; + gmx_mdoutf *outf = nullptr; + int64_t step, step_rel; + double t, t0, lam0[efptNR]; + gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner; + gmx_bool bNS, bNStList, bSimAnn, bStopCM, + bFirstStep, bInitStep, bLastStep = FALSE; + gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; + gmx_bool do_ene, do_log, do_verbose; + gmx_bool bMasterState; + int force_flags, cglo_flags; + tensor force_vir, shake_vir, total_vir, tmp_vir, pres; + int i, m; + rvec mu_tot; + t_vcm *vcm; + matrix parrinellorahmanMu, M; + gmx_repl_ex_t repl_ex = nullptr; + gmx_localtop_t *top; + t_mdebin *mdebin = nullptr; + gmx_enerdata_t *enerd; + PaddedVector<gmx::RVec> f {}; + gmx_global_stat_t gstat; + gmx_update_t *upd = nullptr; + t_graph *graph = nullptr; + gmx_groups_t *groups; + gmx_ekindata_t *ekind; + gmx_shellfc_t *shellfc; + gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; + gmx_bool bTemp, bPres, bTrotter; + real dvdl_constr; + rvec *cbuf = nullptr; + int cbuf_nalloc = 0; + matrix lastbox; + int lamnew = 0; + /* for FEP */ + int nstfep = 0; + double cycles; + real saved_conserved_quantity = 0; + real last_ekin = 0; + t_extmass MassQ; + int **trotter_seq; + char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; + + /* PME load balancing data for GPU kernels */ + pme_load_balancing_t *pme_loadbal = nullptr; + gmx_bool bPMETune = FALSE; + gmx_bool bPMETunePrinting = FALSE; + + /* Interactive MD */ + gmx_bool bIMDstep = FALSE; + + /* PLUMED */ + int plumedNeedsEnergy=0; + int plumedWantsToStop=0; + matrix plumed_vir; + /* END PLUMED */ + + /* Domain decomposition could incorrectly miss a bonded + interaction, but checking for that requires a global + communication stage, which does not otherwise happen in DD + code. So we do that alongside the first global energy reduction + after a new DD is made. These variables handle whether the + check happens, and the result it returns. */ + bool shouldCheckNumberOfBondedInteractions = false; + int totalNumberOfBondedInteractions = -1; + + SimulationSignals signals; + // Most global communnication stages don't propagate mdrun + // signals, and will use this object to achieve that. + SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false); + + if (!mdrunOptions.writeConfout) + { + // This is on by default, and the main known use case for + // turning it off is for convenience in benchmarking, which is + // something that should not show up in the general user + // interface. + GMX_LOG(mdlog.info).asParagraph(). + appendText("The -noconfout functionality is deprecated, and may be removed in a future version."); + } + + /* md-vv uses averaged full step velocities for T-control + md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) + md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ + bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))); + + const bool bRerunMD = false; + int nstglobalcomm = mdrunOptions.globalCommunicationInterval; + + nstglobalcomm = check_nstglobalcomm(mdlog, nstglobalcomm, ir, cr); + bGStatEveryStep = (nstglobalcomm == 1); + + groups = &top_global->groups; + + std::unique_ptr<EssentialDynamics> ed = nullptr; + if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr) + { + /* Initialize essential dynamics sampling */ + ed = init_edsam(mdlog, + opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), + top_global, + ir, cr, constr, + state_global, observablesHistory, + oenv, mdrunOptions.continuationOptions.appendFiles); + } + + /* Initial values */ + init_md(fplog, cr, outputProvider, ir, oenv, mdrunOptions, + &t, &t0, state_global, lam0, + nrnb, top_global, &upd, deform, + nfile, fnm, &outf, &mdebin, + force_vir, shake_vir, total_vir, pres, mu_tot, &bSimAnn, &vcm, wcycle); + + /* Energy terms and groups */ + snew(enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + enerd); + + /* Kinetic energy data */ + snew(ekind, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind); + /* Copy the cos acceleration to the groups struct */ + ekind->cosacc.cos_accel = ir->cos_accel; + + gstat = global_stat_init(ir); + + /* Check for polarizable models and flexible constraints */ + shellfc = init_shell_flexcon(fplog, + top_global, constr ? constr->numFlexibleConstraints() : 0, + ir->nstcalcenergy, DOMAINDECOMP(cr)); + + { + double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); + if ((io > 2000) && MASTER(cr)) + { + fprintf(stderr, + "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", + io); + } + } + + /* Set up interactive MD (IMD) */ + init_IMD(ir, cr, ms, top_global, fplog, ir->nstcalcenergy, + MASTER(cr) ? state_global->x.rvec_array() : nullptr, + nfile, fnm, oenv, mdrunOptions); + + // Local state only becomes valid now. + std::unique_ptr<t_state> stateInstance; + t_state * state; + + if (DOMAINDECOMP(cr)) + { + top = dd_init_local_top(top_global); + + stateInstance = compat::make_unique<t_state>(); + state = stateInstance.get(); + dd_init_local_state(cr->dd, state_global, state); + + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdAtoms, top, fr, + vsite, constr, + nrnb, nullptr, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->natoms); + } + else + { + state_change_natoms(state_global, state_global->natoms); + f.resizeWithPadding(state_global->natoms); + /* Copy the pointer to the global state */ + state = state_global; + + snew(top, 1); + mdAlgorithmsSetupAtomData(cr, ir, top_global, top, fr, + &graph, mdAtoms, constr, vsite, shellfc); + + update_realloc(upd, state->natoms); + } + + auto mdatoms = mdAtoms->mdatoms(); + + // NOTE: The global state is no longer used at this point. + // But state_global is still used as temporary storage space for writing + // the global state to file and potentially for replica exchange. + // (Global topology should persist.) + + update_mdatoms(mdatoms, state->lambda[efptMASS]); + + const ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions; + bool startingFromCheckpoint = continuationOptions.startedFromCheckpoint; + + if (ir->bExpanded) + { + /* Check nstexpanded here, because the grompp check was broken */ + if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0) + { + gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy"); + } + init_expanded_ensemble(startingFromCheckpoint, ir, state->dfhist); + } + + if (MASTER(cr)) + { + if (startingFromCheckpoint) + { + /* Update mdebin with energy history if appending to output files */ + if (continuationOptions.appendFiles) + { + /* If no history is available (because a checkpoint is from before + * it was written) make a new one later, otherwise restore it. + */ + if (observablesHistory->energyHistory) + { + restore_energyhistory_from_state(mdebin, observablesHistory->energyHistory.get()); + } + } + else if (observablesHistory->energyHistory) + { + /* We might have read an energy history from checkpoint. + * As we are not appending, we want to restart the statistics. + * Free the allocated memory and reset the counts. + */ + observablesHistory->energyHistory = {}; + /* We might have read a pull history from checkpoint. + * We will still want to keep the statistics, so that the files + * can be joined and still be meaningful. + * This means that observablesHistory->pullHistory + * should not be reset. + */ + } + } + if (!observablesHistory->energyHistory) + { + observablesHistory->energyHistory = compat::make_unique<energyhistory_t>(); + } + if (!observablesHistory->pullHistory) + { + observablesHistory->pullHistory = compat::make_unique<PullHistory>(); + } + /* Set the initial energy history in state by updating once */ + update_energyhistory(observablesHistory->energyHistory.get(), mdebin); + } + + preparePrevStepPullCom(ir, mdatoms, state, state_global, cr, startingFromCheckpoint); + + // TODO: Remove this by converting AWH into a ForceProvider + auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingFromCheckpoint, + shellfc != nullptr, + opt2fn("-awh", nfile, fnm), ir->pull_work); + + const bool useReplicaExchange = (replExParams.exchangeInterval > 0); + if (useReplicaExchange && MASTER(cr)) + { + repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, + replExParams); + } + /* PME tuning is only supported in the Verlet scheme, with PME for + * Coulomb. It is not supported with only LJ PME. */ + bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && + !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP); + if (bPMETune) + { + pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, + *fr->ic, *fr->nbv->listParams, fr->pmedata, use_GPU(fr->nbv), + &bPMETunePrinting); + } + + if (!ir->bContinuation) + { + if (state->flags & (1 << estV)) + { + auto v = makeArrayRef(state->v); + /* Set the velocities of vsites, shells and frozen atoms to zero */ + for (i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->ptype[i] == eptVSite || + mdatoms->ptype[i] == eptShell) + { + clear_rvec(v[i]); + } + else if (mdatoms->cFREEZE) + { + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) + { + v[i][m] = 0; + } + } + } + } + } + + if (constr) + { + /* Constrain the initial coordinates and velocities */ + do_constrain_first(fplog, constr, ir, mdatoms, state); + } + if (vsite) + { + /* Construct the virtual sites for the initial configuration */ + construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + } + } + + if (ir->efep != efepNO) + { + /* Set free energy calculation frequency as the greatest common + * denominator of nstdhdl and repl_ex_nst. */ + nstfep = ir->fepvals->nstdhdl; + if (ir->bExpanded) + { + nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep); + } + if (useReplicaExchange) + { + nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep); + } + } + + /* Be REALLY careful about what flags you set here. You CANNOT assume + * this is the first step, since we might be restarting from a checkpoint, + * and in that case we should not do any modifications to the state. + */ + bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation); + + if (continuationOptions.haveReadEkin) + { + restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); + } + + cglo_flags = (CGLO_INITIALIZATION | CGLO_TEMPERATURE | CGLO_GSTAT + | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0) + | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) + | (continuationOptions.haveReadEkin ? CGLO_READEKIN : 0)); + + bSumEkinhOld = FALSE; + /* To minimize communication, compute_globals computes the COM velocity + * and the kinetic energy for the velocities without COM motion removed. + * Thus to get the kinetic energy without the COM contribution, we need + * to call compute_globals twice. + */ + for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++) + { + int cglo_flags_iteration = cglo_flags; + if (bStopCM && cgloIteration == 0) + { + cglo_flags_iteration |= CGLO_STOPCM; + cglo_flags_iteration &= ~CGLO_TEMPERATURE; + } + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)); + } + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + if (ir->eI == eiVVAK) + { + /* a second call to get the half step temperature initialized as well */ + /* we do the same call as above, but turn the pressure off -- internally to + compute_globals, this is recognized as a velocity verlet half-step + kinetic energy calculation. This minimized excess variables, but + perhaps loses some logic?*/ + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, state->box, + nullptr, &bSumEkinhOld, + cglo_flags & ~CGLO_PRESSURE); + } + + /* Calculate the initial half step temperature, and save the ekinh_old */ + if (!continuationOptions.startedFromCheckpoint) + { + for (i = 0; (i < ir->opts.ngtc); i++) + { + copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); + } + } + + /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter + temperature control */ + trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); + + if (MASTER(cr)) + { + if (!ir->bContinuation) + { + if (constr && ir->eConstrAlg == econtLINCS) + { + fprintf(fplog, + "RMS relative constraint deviation after constraining: %.2e\n", + constr->rmsd()); + } + if (EI_STATE_VELOCITY(ir->eI)) + { + real temp = enerd->term[F_TEMP]; + if (ir->eI != eiVV) + { + /* Result of Ekin averaged over velocities of -half + * and +half step, while we only have -half step here. + */ + temp *= 2; + } + fprintf(fplog, "Initial temperature: %g K\n", temp); + } + } + + char tbuf[20]; + fprintf(stderr, "starting mdrun '%s'\n", + *(top_global->name)); + if (ir->nsteps >= 0) + { + sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); + } + else + { + sprintf(tbuf, "%s", "infinite"); + } + if (ir->init_step > 0) + { + fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", + gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, + gmx_step_str(ir->init_step, sbuf2), + ir->init_step*ir->delta_t); + } + else + { + fprintf(stderr, "%s steps, %s ps.\n", + gmx_step_str(ir->nsteps, sbuf), tbuf); + } + fprintf(fplog, "\n"); + } + + /* PLUMED */ + if(plumedswitch){ + /* detect plumed API version */ + int pversion=0; + plumed_cmd(plumedmain,"getApiVersion",&pversion); + /* setting kbT is only implemented with api>1) */ + real kbT=ir->opts.ref_t[0]*BOLTZ; + if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT); + if(pversion>2){ + int res=1; + if( (continuationOptions.startedFromCheckpoint) ) plumed_cmd(plumedmain,"setRestart",&res); + } + + if(ms && ms->nsim>1) { + if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&ms->mpi_comm_masters); + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); + }else{ + plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); + } + } + plumed_cmd(plumedmain,"GREX init",NULL); + } + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); + } + } + plumed_cmd(plumedmain,"setNatoms",&top_global->natoms); + plumed_cmd(plumedmain,"setMDEngine","gromacs"); + plumed_cmd(plumedmain,"setLog",fplog); + real real_delta_t=ir->delta_t; + plumed_cmd(plumedmain,"setTimestep",&real_delta_t); + plumed_cmd(plumedmain,"init",NULL); + + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + int nat_home = dd_numHomeAtoms(*cr->dd); + plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); + plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); + } + } + } + /* END PLUMED */ + + walltime_accounting_start_time(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, "mdrun"); + +#if GMX_FAHCORE + /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ + int chkpt_ret = fcCheckPointParallel( cr->nodeid, + NULL, 0); + if (chkpt_ret == 0) + { + gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); + } +#endif + + /*********************************************************** + * + * Loop over MD steps + * + ************************************************************/ + + bFirstStep = TRUE; + /* Skip the first Nose-Hoover integration when we get the state from tpx */ + bInitStep = !startingFromCheckpoint || EI_VV(ir->eI); + bSumEkinhOld = FALSE; + bExchanged = FALSE; + bNeedRepartition = FALSE; + + bool simulationsShareState = false; + int nstSignalComm = nstglobalcomm; + { + // TODO This implementation of ensemble orientation restraints is nasty because + // a user can't just do multi-sim with single-sim orientation restraints. + bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0)); + bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr)); + + // Replica exchange, ensemble restraints and AWH need all + // simulations to remain synchronized, so they need + // checkpoints and stop conditions to act on the same step, so + // the propagation of such signals must take place between + // simulations, not just within simulations. + // TODO: Make algorithm initializers set these flags. + simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim || (plumedswitch && ms); // PLUMED hack, if we have multiple sim and plumed we usually want them to be in sync + + if (simulationsShareState) + { + // Inter-simulation signal communication does not need to happen + // often, so we use a minimum of 200 steps to reduce overhead. + const int c_minimumInterSimulationSignallingInterval = 200; + nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm; + } + } + + auto stopHandler = stopHandlerBuilder->getStopHandlerMD( + compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState, + MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm, + mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting); + + auto checkpointHandler = compat::make_unique<CheckpointHandler>( + compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), + simulationsShareState, ir->nstlist == 0, MASTER(cr), + mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period); + + const bool resetCountersIsLocal = true; + auto resetHandler = compat::make_unique<ResetHandler>( + compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal, + ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway, + mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting); + + DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion = (DOMAINDECOMP(cr) ? DdOpenBalanceRegionBeforeForceComputation::yes : DdOpenBalanceRegionBeforeForceComputation::no); + DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion = (DOMAINDECOMP(cr) ? DdCloseBalanceRegionAfterForceComputation::yes : DdCloseBalanceRegionAfterForceComputation::no); + + step = ir->init_step; + step_rel = 0; + + // TODO extract this to new multi-simulation module + if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange) + { + if (!multisim_int_all_are_equal(ms, ir->nsteps)) + { + GMX_LOG(mdlog.warning).appendText( + "Note: The number of steps is not consistent across multi simulations,\n" + "but we are proceeding anyway!"); + } + if (!multisim_int_all_are_equal(ms, ir->init_step)) + { + GMX_LOG(mdlog.warning).appendText( + "Note: The initial step is not consistent across multi simulations,\n" + "but we are proceeding anyway!"); + } + } + + /* and stop now if we should */ + bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps)); + while (!bLastStep) + { + + /* Determine if this is a neighbor search step */ + bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); + + if (bPMETune && bNStList) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + pme_loadbal_do(pme_loadbal, cr, + (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr, + fplog, mdlog, + *ir, fr, *state, + wcycle, + step, step_rel, + &bPMETunePrinting); + } + + wallcycle_start(wcycle, ewcSTEP); + + bLastStep = (step_rel == ir->nsteps); + t = t0 + step*ir->delta_t; + + // TODO Refactor this, so that nstfep does not need a default value of zero + if (ir->efep != efepNO || ir->bSimTemp) + { + /* find and set the current lambdas */ + setCurrentLambdasLocal(step, ir->fepvals, lam0, state); + + bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); + bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep)); + bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) + && (ir->bExpanded) && (step > 0) && (!startingFromCheckpoint)); + } + + bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep && + do_per_step(step, replExParams.exchangeInterval)); + + if (bSimAnn) + { + update_annealing_target_temp(ir, t, upd); + } + + /* Stop Center of Mass motion */ + bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); + + /* Determine whether or not to do Neighbour Searching */ + bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition); + + bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS); + + /* do_log triggers energy and virial calculation. Because this leads + * to different code paths, forces can be different. Thus for exact + * continuation we should avoid extra log output. + * Note that the || bLastStep can result in non-exact continuation + * beyond the last step. But we don't consider that to be an issue. + */ + do_log = do_per_step(step, ir->nstlog) || (bFirstStep && !startingFromCheckpoint) || bLastStep; + do_verbose = mdrunOptions.verbose && + (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep); + + if (bNS && !(bFirstStep && ir->bContinuation)) + { + bMasterState = FALSE; + /* Correct the new box if it is too skewed */ + if (inputrecDynamicBox(ir)) + { + if (correct_box(fplog, step, state->box, graph)) + { + bMasterState = TRUE; + } + } + if (DOMAINDECOMP(cr) && bMasterState) + { + dd_collect_state(cr->dd, state, state_global); + } + + if (DOMAINDECOMP(cr)) + { + /* Repartition the domain decomposition */ + dd_partition_system(fplog, mdlog, step, cr, + bMasterState, nstglobalcomm, + state_global, top_global, ir, + state, &f, mdAtoms, top, fr, + vsite, constr, + nrnb, wcycle, + do_verbose && !bPMETunePrinting); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->natoms); + + /* PLUMED */ + if(plumedswitch){ + int nat_home = dd_numHomeAtoms(*cr->dd); + plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); + plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); + } + /* END PLUMED */ + } + } + + if (MASTER(cr) && do_log) + { + print_ebin_header(fplog, step, t); /* can we improve the information printed here? */ + } + + if (ir->efep != efepNO) + { + update_mdatoms(mdatoms, state->lambda[efptMASS]); + } + + if (bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot, + constr, &nullSignaller, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS); + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + clear_mat(force_vir); + + /* PLUMED HREX */ + gmx_bool bHREX = bDoReplEx && plumed_hrex; + + if (plumedswitch && bHREX) { + gmx_enerdata_t *hrex_enerd; + snew(hrex_enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,hrex_enerd); + int repl = -1; + int nrepl = -1; + if (MASTER(cr)){ + repl = replica_exchange_get_repl(repl_ex); + nrepl = replica_exchange_get_nrepl(repl_ex); + } + + if (DOMAINDECOMP(cr)) { + dd_collect_state(cr->dd,state,state_global); + } else { + copy_state_serial(state, state_global); + } + + if(MASTER(cr)){ + if(repl%2==step/replExParams.exchangeInterval%2){ + if(repl-1>=0) exchange_state(ms,repl-1,state_global); + }else{ + if(repl+1<nrepl) exchange_state(ms,repl+1,state_global); + } + } + if (!DOMAINDECOMP(cr)) { + copy_state_serial(state_global, state); + } + if(PAR(cr)){ + if (DOMAINDECOMP(cr)) { + dd_partition_system(fplog,mdlog,step,cr,TRUE,1, + state_global,top_global,ir, + state,&f,mdAtoms,top,fr,vsite,constr, + nrnb,wcycle,FALSE); + } + } + do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, + step, nrnb, wcycle, top, groups, + state->box, state->x.arrayRefWithPadding(), &state->hist, + f.arrayRefWithPadding(), force_vir, mdatoms, hrex_enerd, fcd, + state->lambda, graph, + fr, ppForceWorkload, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, + GMX_FORCE_STATECHANGED | + GMX_FORCE_DYNAMICBOX | + GMX_FORCE_ALLFORCES | + GMX_FORCE_VIRIAL | + GMX_FORCE_ENERGY | + GMX_FORCE_DHDL | + GMX_FORCE_NS, + ddOpenBalanceRegion, ddCloseBalanceRegion); + + plumed_cmd(plumedmain,"GREX cacheLocalUSwap",&hrex_enerd->term[F_EPOT]); + sfree(hrex_enerd); + + /* exchange back */ + if (DOMAINDECOMP(cr)) { + dd_collect_state(cr->dd,state,state_global); + } else { + copy_state_serial(state, state_global); + } + + if(MASTER(cr)){ + if(repl%2==step/replExParams.exchangeInterval%2){ + if(repl-1>=0) exchange_state(ms,repl-1,state_global); + }else{ + if(repl+1<nrepl) exchange_state(ms,repl+1,state_global); + } + } + + if (!DOMAINDECOMP(cr)) { + copy_state_serial(state_global, state); + } + if(PAR(cr)){ + if (DOMAINDECOMP(cr)) { + dd_partition_system(fplog,mdlog,step,cr,TRUE,1, + state_global,top_global,ir, + state,&f,mdAtoms,top,fr,vsite,constr, + nrnb,wcycle,FALSE); + int nat_home = dd_numHomeAtoms(*cr->dd); + plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home); + plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); + } + } + } + /* END PLUMED HREX */ + + checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep); + + /* Determine the energy and pressure: + * at nstcalcenergy steps and at energy output steps (set below). + */ + if (EI_VV(ir->eI) && (!bInitStep)) + { + bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); + } + else + { + bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); + } + bCalcEner = bCalcEnerStep; + + do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); + + if (do_ene || do_log || bDoReplEx) + { + bCalcVir = TRUE; + bCalcEner = TRUE; + } + + /* Do we need global communication ? */ + bGStat = (bCalcVir || bCalcEner || bStopCM || + do_per_step(step, nstglobalcomm) || + (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm))); + + force_flags = (GMX_FORCE_STATECHANGED | + ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) | + GMX_FORCE_ALLFORCES | + (bCalcVir ? GMX_FORCE_VIRIAL : 0) | + (bCalcEner ? GMX_FORCE_ENERGY : 0) | + (bDoFEP ? GMX_FORCE_DHDL : 0) + ); + + if (shellfc) + { + /* Now is the time to relax the shells */ + relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, + enforcedRotation, step, + ir, bNS, force_flags, top, + constr, enerd, fcd, + state, f.arrayRefWithPadding(), force_vir, mdatoms, + nrnb, wcycle, graph, groups, + shellfc, fr, ppForceWorkload, t, mu_tot, + vsite, + ddOpenBalanceRegion, ddCloseBalanceRegion); + } + else + { + /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated + (or the AWH update will be performed twice for one step when continuing). It would be best to + call this update function from do_md_trajectory_writing but that would occur after do_force. + One would have to divide the update_awh function into one function applying the AWH force + and one doing the AWH bias update. The update AWH bias function could then be called after + do_md_trajectory_writing (then containing update_awh_history). + The checkpointing will in the future probably moved to the start of the md loop which will + rid of this issue. */ + if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr)) + { + awh->updateHistory(state_global->awhHistory.get()); + } + + /* The coordinates (x) are shifted (to get whole molecules) + * in do_force. + * This is parallellized as well, and does communication too. + * Check comments in sim_util.c + */ + + /* PLUMED */ + plumedNeedsEnergy=0; + if(plumedswitch){ + int pversion=0; + plumed_cmd(plumedmain,"getApiVersion",&pversion); + long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep); + plumed_cmd(plumedmain,"setPositions",&state->x[0][0]); + plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]); + plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]); + plumed_cmd(plumedmain,"setBox",&state->box[0][0]); + plumed_cmd(plumedmain,"prepareCalc",NULL); + plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop); + int checkp=0; if(checkpointHandler->isCheckpointingStep()) checkp=1; + if(pversion>3) plumed_cmd(plumedmain,"doCheckPoint",&checkp); + plumed_cmd(plumedmain,"setForces",&f[0][0]); + plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); + if(plumedNeedsEnergy) force_flags |= GMX_FORCE_ENERGY | GMX_FORCE_VIRIAL; + clear_mat(plumed_vir); + plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]); + } + /* END PLUMED */ + do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, + step, nrnb, wcycle, top, groups, + state->box, state->x.arrayRefWithPadding(), &state->hist, + f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, + state->lambda, graph, + fr, ppForceWorkload, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, + (bNS ? GMX_FORCE_NS : 0) | force_flags, + ddOpenBalanceRegion, ddCloseBalanceRegion); + /* PLUMED */ + if(plumedswitch){ + if(plumedNeedsEnergy){ + msmul(force_vir,2.0,plumed_vir); + plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]); + plumed_cmd(plumedmain,"performCalc",NULL); + msmul(plumed_vir,0.5,force_vir); + } else { + msmul(plumed_vir,0.5,plumed_vir); + m_add(force_vir,plumed_vir,force_vir); + } + if(bDoReplEx) plumed_cmd(plumedmain,"GREX savePositions",NULL); + if(plumedWantsToStop) ir->nsteps=step_rel+1; + if(bHREX) plumed_cmd(plumedmain,"GREX cacheLocalUNow",&enerd->term[F_EPOT]); + } + /* END PLUMED */ + } + + if (EI_VV(ir->eI) && !startingFromCheckpoint) + /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ + { + rvec *vbuf = nullptr; + + wallcycle_start(wcycle, ewcUPDATE); + if (ir->eI == eiVV && bInitStep) + { + /* if using velocity verlet with full time step Ekin, + * take the first half step only to compute the + * virial for the first step. From there, + * revert back to the initial coordinates + * so that the input is actually the initial step. + */ + snew(vbuf, state->natoms); + copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */ + } + else + { + /* this is for NHC in the Ekin(t+dt/2) version of vv */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); + } + + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtVELOCITY1, + cr, constr); + + wallcycle_stop(wcycle, ewcUPDATE); + constrain_velocities(step, nullptr, + state, + shake_vir, + constr, + bCalcVir, do_log, do_ene); + wallcycle_start(wcycle, ewcUPDATE); + /* if VV, compute the pressure and constraints */ + /* For VV2, we strictly only need this if using pressure + * control, but we really would like to have accurate pressures + * printed out. + * Think about ways around this in the future? + * For now, keep this choice in comments. + */ + /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */ + /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/ + bPres = TRUE; + bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); + if (bCalcEner && ir->eI == eiVVAK) + { + bSumEkinhOld = TRUE; + } + /* for vv, the first half of the integration actually corresponds to the previous step. + So we need information from the last step in the first half of the integration */ + if (bGStat || do_per_step(step-1, nstglobalcomm)) + { + wallcycle_stop(wcycle, ewcUPDATE); + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | (bCalcEner ? CGLO_ENERGY : 0) + | (bTemp ? CGLO_TEMPERATURE : 0) + | (bPres ? CGLO_PRESSURE : 0) + | (bPres ? CGLO_CONSTRAINT : 0) + | (bStopCM ? CGLO_STOPCM : 0) + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + | CGLO_SCALEEKIN + ); + /* explanation of above: + a) We compute Ekin at the full time step + if 1) we are using the AveVel Ekin, and it's not the + initial step, or 2) if we are using AveEkin, but need the full + time step kinetic energy for the pressure (always true now, since we want accurate statistics). + b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in + EkinAveVel because it's needed for the pressure */ + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + wallcycle_start(wcycle, ewcUPDATE); + } + /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ + if (!bInitStep) + { + if (bTrotter) + { + m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); + + /* TODO This is only needed when we're about to write + * a checkpoint, because we use it after the restart + * (in a kludge?). But what should we be doing if + * startingFromCheckpoint or bInitStep are true? */ + if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)) + { + copy_mat(shake_vir, state->svir_prev); + copy_mat(force_vir, state->fvir_prev); + } + if (inputrecNvtTrotter(ir) && ir->eI == eiVV) + { + /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ + enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE); + enerd->term[F_EKIN] = trace(ekind->ekin); + } + } + else if (bExchanged) + { + wallcycle_stop(wcycle, ewcUPDATE); + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot, + constr, &nullSignaller, state->box, + nullptr, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE); + wallcycle_start(wcycle, ewcUPDATE); + } + } + /* if it's the initial step, we performed this first step just to get the constraint virial */ + if (ir->eI == eiVV && bInitStep) + { + copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms); + sfree(vbuf); + } + wallcycle_stop(wcycle, ewcUPDATE); + } + + /* compute the conserved quantity */ + if (EI_VV(ir->eI)) + { + saved_conserved_quantity = NPT_energy(ir, state, &MassQ); + if (ir->eI == eiVV) + { + last_ekin = enerd->term[F_EKIN]; + } + if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) + { + saved_conserved_quantity -= enerd->term[F_DISPCORR]; + } + /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ + if (ir->efep != efepNO) + { + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + } + + /* ######## END FIRST UPDATE STEP ############## */ + /* ######## If doing VV, we now have v(dt) ###### */ + if (bDoExpanded) + { + /* perform extended ensemble sampling in lambda - we don't + actually move to the new state before outputting + statistics, but if performing simulated tempering, we + do update the velocities and the tau_t. */ + + lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms); + /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ + if (MASTER(cr)) + { + copy_df_history(state_global->dfhist, state->dfhist); + } + } + + /* Now we have the energies and forces corresponding to the + * coordinates at time t. We must output all of this before + * the update. + */ + do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, + ir, state, state_global, observablesHistory, + top_global, fr, + outf, mdebin, ekind, f, + checkpointHandler->isCheckpointingStep(), + bRerunMD, bLastStep, + mdrunOptions.writeConfout, + bSumEkinhOld); + /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ + bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x.rvec_array(), ir, t, wcycle); + + /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */ + if (startingFromCheckpoint && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))) + { + copy_mat(state->svir_prev, shake_vir); + copy_mat(state->fvir_prev, force_vir); + } + + stopHandler->setSignal(); + resetHandler->setSignal(walltime_accounting); + + if (bGStat || !PAR(cr)) + { + /* In parallel we only have to check for checkpointing in steps + * where we do global communication, + * otherwise the other nodes don't know. + */ + checkpointHandler->setSignal(walltime_accounting); + } + + /* ######### START SECOND UPDATE STEP ################# */ + + /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled + in preprocessing */ + + if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ + { + gmx_bool bIfRandomize; + bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, upd, constr); + /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ + if (constr && bIfRandomize) + { + constrain_velocities(step, nullptr, + state, + tmp_vir, + constr, + bCalcVir, do_log, do_ene); + } + } + /* Box is changed in update() when we do pressure coupling, + * but we should still use the old box for energy corrections and when + * writing it to the energy file, so it matches the trajectory files for + * the same timestep above. Make a copy in a separate array. + */ + copy_mat(state->box, lastbox); + + dvdl_constr = 0; + + wallcycle_start(wcycle, ewcUPDATE); + /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ + if (bTrotter) + { + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); + /* We can only do Berendsen coupling after we have summed + * the kinetic energy or virial. Since the happens + * in global_state after update, we should only do it at + * step % nstlist = 1 with bGStatEveryStep=FALSE. + */ + } + else + { + update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); + update_pcouple_before_coordinates(fplog, step, ir, state, + parrinellorahmanMu, M, + bInitStep); + } + + if (EI_VV(ir->eI)) + { + /* velocity half-step update */ + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtVELOCITY2, + cr, constr); + } + + /* Above, initialize just copies ekinh into ekin, + * it doesn't copy position (for VV), + * and entire integrator for MD. + */ + + if (ir->eI == eiVVAK) + { + /* We probably only need md->homenr, not state->natoms */ + if (state->natoms > cbuf_nalloc) + { + cbuf_nalloc = state->natoms; + srenew(cbuf, cbuf_nalloc); + } + copy_rvecn(as_rvec_array(state->x.data()), cbuf, 0, state->natoms); + } + + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + constrain_coordinates(step, &dvdl_constr, state, + shake_vir, + upd, constr, + bCalcVir, do_log, do_ene); + update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, + cr, nrnb, wcycle, upd, constr, do_log, do_ene); + finish_update(ir, mdatoms, + state, graph, + nrnb, wcycle, upd, constr); + + if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM) + { + updatePrevStepPullCom(ir->pull_work, state); + } + + if (ir->eI == eiVVAK) + { + /* erase F_EKIN and F_TEMP here? */ + /* just compute the kinetic energy at the half step to perform a trotter step */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, lastbox, + nullptr, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE + ); + wallcycle_start(wcycle, ewcUPDATE); + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); + /* now we know the scaling, we can compute the positions again again */ + copy_rvecn(cbuf, as_rvec_array(state->x.data()), 0, state->natoms); + + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */ + /* are the small terms in the shake_vir here due + * to numerical errors, or are they important + * physically? I'm thinking they are just errors, but not completely sure. + * For now, will call without actually constraining, constr=NULL*/ + finish_update(ir, mdatoms, + state, graph, + nrnb, wcycle, upd, nullptr); + } + if (EI_VV(ir->eI)) + { + /* this factor or 2 correction is necessary + because half of the constraint force is removed + in the vv step, so we have to double it. See + the Redmine issue #1255. It is not yet clear + if the factor of 2 is exact, or just a very + good approximation, and this will be + investigated. The next step is to see if this + can be done adding a dhdl contribution from the + rattle step, but this is somewhat more + complicated with the current code. Will be + investigated, hopefully for 4.6.3. However, + this current solution is much better than + having it completely wrong. + */ + enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; + } + else + { + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + } + + if (vsite != nullptr) + { + wallcycle_start(wcycle, ewcVSITECONSTR); + if (graph != nullptr) + { + shift_self(graph, state->box, state->x.rvec_array()); + } + construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(), + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + + if (graph != nullptr) + { + unshift_self(graph, state->box, state->x.rvec_array()); + } + wallcycle_stop(wcycle, ewcVSITECONSTR); + } + + /* ############## IF NOT VV, Calculate globals HERE ############ */ + /* With Leap-Frog we can skip compute_globals at + * non-communication steps, but we need to calculate + * the kinetic energy one step before communication. + */ + { + // Organize to do inter-simulation signalling on steps if + // and when algorithms require it. + bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm)); + + if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal) + { + // Since we're already communicating at this step, we + // can propagate intra-simulation signals. Note that + // check_nstglobalcomm has the responsibility for + // choosing the value of nstglobalcomm that is one way + // bGStat becomes true, so we can't get into a + // situation where e.g. checkpointing can't be + // signalled. + bool doIntraSimSignal = true; + SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal); + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &signaller, + lastbox, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0) + | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) + | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) + | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) + | CGLO_CONSTRAINT + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + ); + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + } + + /* ############# END CALC EKIN AND PRESSURE ################# */ + + /* Note: this is OK, but there are some numerical precision issues with using the convergence of + the virial that should probably be addressed eventually. state->veta has better properies, + but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could + generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ + + if (ir->efep != efepNO && !EI_VV(ir->eI)) + { + /* Sum up the foreign energy and dhdl terms for md and sd. + Currently done every step so that dhdl is correct in the .edr */ + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + + update_pcouple_after_coordinates(fplog, step, ir, mdatoms, + pres, force_vir, shake_vir, + parrinellorahmanMu, + state, nrnb, upd); + + /* ################# END UPDATE STEP 2 ################# */ + /* #### We now have r(t+dt) and v(t+dt/2) ############# */ + + /* The coordinates (x) were unshifted in update */ + if (!bGStat) + { + /* We will not sum ekinh_old, + * so signal that we still have to do it. + */ + bSumEkinhOld = TRUE; + } + + if (bCalcEner) + { + /* ######### BEGIN PREPARING EDR OUTPUT ########### */ + + /* use the directly determined last velocity, not actually the averaged half steps */ + if (bTrotter && ir->eI == eiVV) + { + enerd->term[F_EKIN] = last_ekin; + } + enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; + + if (integratorHasConservedEnergyQuantity(ir)) + { + if (EI_VV(ir->eI)) + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; + } + else + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ); + } + } + /* ######### END PREPARING EDR OUTPUT ########### */ + } + + /* Output stuff */ + if (MASTER(cr)) + { + if (fplog && do_log && bDoExpanded) + { + /* only needed if doing expanded ensemble */ + PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr, + state_global->dfhist, state->fep_state, ir->nstlog, step); + } + if (bCalcEner) + { + upd_mdebin(mdebin, bDoDHDL, bCalcEnerStep, + t, mdatoms->tmass, enerd, state, + ir->fepvals, ir->expandedvals, lastbox, + shake_vir, force_vir, total_vir, pres, + ekind, mu_tot, constr); + } + else + { + upd_mdebin_step(mdebin); + } + + gmx_bool do_dr = do_per_step(step, ir->nstdisreout); + gmx_bool do_or = do_per_step(step, ir->nstorireout); + + print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : nullptr, + step, t, + eprNORMAL, mdebin, fcd, groups, &(ir->opts), awh.get()); + + if (ir->bPull) + { + pull_print_output(ir->pull_work, step, t); + } + + if (do_per_step(step, ir->nstlog)) + { + if (fflush(fplog) != 0) + { + gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); + } + } + } + if (bDoExpanded) + { + /* Have to do this part _after_ outputting the logfile and the edr file */ + /* Gets written into the state at the beginning of next loop*/ + state->fep_state = lamnew; + } + /* Print the remaining wall clock time for the run */ + if (isMasterSimMasterRank(ms, cr) && + (do_verbose || gmx_got_usr_signal()) && + !bPMETunePrinting) + { + if (shellfc) + { + fprintf(stderr, "\n"); + } + print_time(stderr, walltime_accounting, step, ir, cr); + } + + /* Ion/water position swapping. + * Not done in last step since trajectory writing happens before this call + * in the MD loop and exchanges would be lost anyway. */ + bNeedRepartition = FALSE; + if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && + do_per_step(step, ir->swap->nstswap)) + { + bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, + as_rvec_array(state->x.data()), + state->box, + MASTER(cr) && mdrunOptions.verbose, + bRerunMD); + + if (bNeedRepartition && DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + /* Replica exchange */ + bExchanged = FALSE; + if (bDoReplEx) + { + bExchanged = replica_exchange(fplog, cr, ms, repl_ex, + state_global, enerd, + state, step, t); + } + + if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) + { + dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdAtoms, top, fr, + vsite, constr, + nrnb, wcycle, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->natoms); + } + + bFirstStep = FALSE; + bInitStep = FALSE; + startingFromCheckpoint = false; + + /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ + /* With all integrators, except VV, we need to retain the pressure + * at the current step for coupling at the next step. + */ + if ((state->flags & (1<<estPRES_PREV)) && + (bGStatEveryStep || + (ir->nstpcouple > 0 && step % ir->nstpcouple == 0))) + { + /* Store the pressure in t_state for pressure coupling + * at the next MD step. + */ + copy_mat(pres, state->pres_prev); + } + + /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ + + if ( (membed != nullptr) && (!bLastStep) ) + { + rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data())); + } + + cycles = wallcycle_stop(wcycle, ewcSTEP); + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles, ddCyclStep); + } + + /* increase the MD step number */ + step++; + step_rel++; + + resetHandler->resetCounters( + step, step_rel, mdlog, fplog, cr, (use_GPU(fr->nbv) ? fr->nbv : nullptr), + nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting); + + /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ + IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle); + + } + /* End of main MD loop */ + + /* Closing TNG files can include compressing data. Therefore it is good to do that + * before stopping the time measurements. */ + mdoutf_tng_close(outf); + + /* Stop measuring walltime */ + walltime_accounting_end_time(walltime_accounting); + + if (!thisRankHasDuty(cr, DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + if (MASTER(cr)) + { + if (ir->nstcalcenergy > 0) + { + print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, + eprAVER, mdebin, fcd, groups, &(ir->opts), awh.get()); + } + } + done_mdebin(mdebin); + done_mdoutf(outf); + + if (bPMETune) + { + pme_loadbal_done(pme_loadbal, fplog, mdlog, use_GPU(fr->nbv)); + } + + done_shellfc(fplog, shellfc, step_rel); + + if (useReplicaExchange && MASTER(cr)) + { + print_replica_exchange_statistics(fplog, repl_ex); + } + + // Clean up swapcoords + if (ir->eSwapCoords != eswapNO) + { + finish_swapcoords(ir->swap); + } + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(ir->bIMD, ir->imd); + + walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); + + destroy_enerdata(enerd); + sfree(enerd); + sfree(top); +} diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/md.cpp.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/md.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..a49a15df53a4246970f4f4a00e542f1c9a7cedbe --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/md.cpp.preplumed @@ -0,0 +1,1531 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements the integrator for normal molecular dynamics simulations + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include <cinttypes> +#include <cmath> +#include <cstdio> +#include <cstdlib> + +#include <algorithm> +#include <memory> + +#include "gromacs/awh/awh.h" +#include "gromacs/commandline/filenm.h" +#include "gromacs/compat/make_unique.h" +#include "gromacs/domdec/collect.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_network.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/domdec/partition.h" +#include "gromacs/essentialdynamics/edsam.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme-load-balancing.h" +#include "gromacs/fileio/trxio.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/imd/imd.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/math/vectypes.h" +#include "gromacs/mdlib/checkpointhandler.h" +#include "gromacs/mdlib/compute_io.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/ebin.h" +#include "gromacs/mdlib/expanded.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/force_flags.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdoutf.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/mdsetup.h" +#include "gromacs/mdlib/membed.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/resethandler.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/simulationsignal.h" +#include "gromacs/mdlib/stophandler.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vcm.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/awh-history.h" +#include "gromacs/mdtypes/awh-params.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/df_history.h" +#include "gromacs/mdtypes/energyhistory.h" +#include "gromacs/mdtypes/fcdata.h" +#include "gromacs/mdtypes/forcerec.h" +#include "gromacs/mdtypes/group.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/interaction_const.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/mdatom.h" +#include "gromacs/mdtypes/observableshistory.h" +#include "gromacs/mdtypes/pullhistory.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/output.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/swap/swapcoords.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/atoms.h" +#include "gromacs/topology/idef.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/topology/topology.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/logger.h" +#include "gromacs/utility/real.h" +#include "gromacs/utility/smalloc.h" + +#include "integrator.h" +#include "replicaexchange.h" + +#if GMX_FAHCORE +#include "corewrap.h" +#endif + +using gmx::SimulationSignaller; + +void gmx::Integrator::do_md() +{ + // TODO Historically, the EM and MD "integrators" used different + // names for the t_inputrec *parameter, but these must have the + // same name, now that it's a member of a struct. We use this ir + // alias to avoid a large ripple of nearly useless changes. + // t_inputrec is being replaced by IMdpOptionsProvider, so this + // will go away eventually. + t_inputrec *ir = inputrec; + gmx_mdoutf *outf = nullptr; + int64_t step, step_rel; + double t, t0, lam0[efptNR]; + gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner; + gmx_bool bNS, bNStList, bSimAnn, bStopCM, + bFirstStep, bInitStep, bLastStep = FALSE; + gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; + gmx_bool do_ene, do_log, do_verbose; + gmx_bool bMasterState; + int force_flags, cglo_flags; + tensor force_vir, shake_vir, total_vir, tmp_vir, pres; + int i, m; + rvec mu_tot; + t_vcm *vcm; + matrix parrinellorahmanMu, M; + gmx_repl_ex_t repl_ex = nullptr; + gmx_localtop_t *top; + t_mdebin *mdebin = nullptr; + gmx_enerdata_t *enerd; + PaddedVector<gmx::RVec> f {}; + gmx_global_stat_t gstat; + gmx_update_t *upd = nullptr; + t_graph *graph = nullptr; + gmx_groups_t *groups; + gmx_ekindata_t *ekind; + gmx_shellfc_t *shellfc; + gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition; + gmx_bool bTemp, bPres, bTrotter; + real dvdl_constr; + rvec *cbuf = nullptr; + int cbuf_nalloc = 0; + matrix lastbox; + int lamnew = 0; + /* for FEP */ + int nstfep = 0; + double cycles; + real saved_conserved_quantity = 0; + real last_ekin = 0; + t_extmass MassQ; + int **trotter_seq; + char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; + + /* PME load balancing data for GPU kernels */ + pme_load_balancing_t *pme_loadbal = nullptr; + gmx_bool bPMETune = FALSE; + gmx_bool bPMETunePrinting = FALSE; + + /* Interactive MD */ + gmx_bool bIMDstep = FALSE; + + /* Domain decomposition could incorrectly miss a bonded + interaction, but checking for that requires a global + communication stage, which does not otherwise happen in DD + code. So we do that alongside the first global energy reduction + after a new DD is made. These variables handle whether the + check happens, and the result it returns. */ + bool shouldCheckNumberOfBondedInteractions = false; + int totalNumberOfBondedInteractions = -1; + + SimulationSignals signals; + // Most global communnication stages don't propagate mdrun + // signals, and will use this object to achieve that. + SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false); + + if (!mdrunOptions.writeConfout) + { + // This is on by default, and the main known use case for + // turning it off is for convenience in benchmarking, which is + // something that should not show up in the general user + // interface. + GMX_LOG(mdlog.info).asParagraph(). + appendText("The -noconfout functionality is deprecated, and may be removed in a future version."); + } + + /* md-vv uses averaged full step velocities for T-control + md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) + md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ + bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir))); + + const bool bRerunMD = false; + int nstglobalcomm = mdrunOptions.globalCommunicationInterval; + + nstglobalcomm = check_nstglobalcomm(mdlog, nstglobalcomm, ir, cr); + bGStatEveryStep = (nstglobalcomm == 1); + + groups = &top_global->groups; + + std::unique_ptr<EssentialDynamics> ed = nullptr; + if (opt2bSet("-ei", nfile, fnm) || observablesHistory->edsamHistory != nullptr) + { + /* Initialize essential dynamics sampling */ + ed = init_edsam(mdlog, + opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), + top_global, + ir, cr, constr, + state_global, observablesHistory, + oenv, mdrunOptions.continuationOptions.appendFiles); + } + + /* Initial values */ + init_md(fplog, cr, outputProvider, ir, oenv, mdrunOptions, + &t, &t0, state_global, lam0, + nrnb, top_global, &upd, deform, + nfile, fnm, &outf, &mdebin, + force_vir, shake_vir, total_vir, pres, mu_tot, &bSimAnn, &vcm, wcycle); + + /* Energy terms and groups */ + snew(enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + enerd); + + /* Kinetic energy data */ + snew(ekind, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind); + /* Copy the cos acceleration to the groups struct */ + ekind->cosacc.cos_accel = ir->cos_accel; + + gstat = global_stat_init(ir); + + /* Check for polarizable models and flexible constraints */ + shellfc = init_shell_flexcon(fplog, + top_global, constr ? constr->numFlexibleConstraints() : 0, + ir->nstcalcenergy, DOMAINDECOMP(cr)); + + { + double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); + if ((io > 2000) && MASTER(cr)) + { + fprintf(stderr, + "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", + io); + } + } + + /* Set up interactive MD (IMD) */ + init_IMD(ir, cr, ms, top_global, fplog, ir->nstcalcenergy, + MASTER(cr) ? state_global->x.rvec_array() : nullptr, + nfile, fnm, oenv, mdrunOptions); + + // Local state only becomes valid now. + std::unique_ptr<t_state> stateInstance; + t_state * state; + + if (DOMAINDECOMP(cr)) + { + top = dd_init_local_top(top_global); + + stateInstance = compat::make_unique<t_state>(); + state = stateInstance.get(); + dd_init_local_state(cr->dd, state_global, state); + + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdAtoms, top, fr, + vsite, constr, + nrnb, nullptr, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->natoms); + } + else + { + state_change_natoms(state_global, state_global->natoms); + f.resizeWithPadding(state_global->natoms); + /* Copy the pointer to the global state */ + state = state_global; + + snew(top, 1); + mdAlgorithmsSetupAtomData(cr, ir, top_global, top, fr, + &graph, mdAtoms, constr, vsite, shellfc); + + update_realloc(upd, state->natoms); + } + + auto mdatoms = mdAtoms->mdatoms(); + + // NOTE: The global state is no longer used at this point. + // But state_global is still used as temporary storage space for writing + // the global state to file and potentially for replica exchange. + // (Global topology should persist.) + + update_mdatoms(mdatoms, state->lambda[efptMASS]); + + const ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions; + bool startingFromCheckpoint = continuationOptions.startedFromCheckpoint; + + if (ir->bExpanded) + { + /* Check nstexpanded here, because the grompp check was broken */ + if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0) + { + gmx_fatal(FARGS, "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy"); + } + init_expanded_ensemble(startingFromCheckpoint, ir, state->dfhist); + } + + if (MASTER(cr)) + { + if (startingFromCheckpoint) + { + /* Update mdebin with energy history if appending to output files */ + if (continuationOptions.appendFiles) + { + /* If no history is available (because a checkpoint is from before + * it was written) make a new one later, otherwise restore it. + */ + if (observablesHistory->energyHistory) + { + restore_energyhistory_from_state(mdebin, observablesHistory->energyHistory.get()); + } + } + else if (observablesHistory->energyHistory) + { + /* We might have read an energy history from checkpoint. + * As we are not appending, we want to restart the statistics. + * Free the allocated memory and reset the counts. + */ + observablesHistory->energyHistory = {}; + /* We might have read a pull history from checkpoint. + * We will still want to keep the statistics, so that the files + * can be joined and still be meaningful. + * This means that observablesHistory->pullHistory + * should not be reset. + */ + } + } + if (!observablesHistory->energyHistory) + { + observablesHistory->energyHistory = compat::make_unique<energyhistory_t>(); + } + if (!observablesHistory->pullHistory) + { + observablesHistory->pullHistory = compat::make_unique<PullHistory>(); + } + /* Set the initial energy history in state by updating once */ + update_energyhistory(observablesHistory->energyHistory.get(), mdebin); + } + + preparePrevStepPullCom(ir, mdatoms, state, state_global, cr, startingFromCheckpoint); + + // TODO: Remove this by converting AWH into a ForceProvider + auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms, startingFromCheckpoint, + shellfc != nullptr, + opt2fn("-awh", nfile, fnm), ir->pull_work); + + const bool useReplicaExchange = (replExParams.exchangeInterval > 0); + if (useReplicaExchange && MASTER(cr)) + { + repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, + replExParams); + } + /* PME tuning is only supported in the Verlet scheme, with PME for + * Coulomb. It is not supported with only LJ PME. */ + bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && + !mdrunOptions.reproducible && ir->cutoff_scheme != ecutsGROUP); + if (bPMETune) + { + pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, + *fr->ic, *fr->nbv->listParams, fr->pmedata, use_GPU(fr->nbv), + &bPMETunePrinting); + } + + if (!ir->bContinuation) + { + if (state->flags & (1 << estV)) + { + auto v = makeArrayRef(state->v); + /* Set the velocities of vsites, shells and frozen atoms to zero */ + for (i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->ptype[i] == eptVSite || + mdatoms->ptype[i] == eptShell) + { + clear_rvec(v[i]); + } + else if (mdatoms->cFREEZE) + { + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) + { + v[i][m] = 0; + } + } + } + } + } + + if (constr) + { + /* Constrain the initial coordinates and velocities */ + do_constrain_first(fplog, constr, ir, mdatoms, state); + } + if (vsite) + { + /* Construct the virtual sites for the initial configuration */ + construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, nullptr, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + } + } + + if (ir->efep != efepNO) + { + /* Set free energy calculation frequency as the greatest common + * denominator of nstdhdl and repl_ex_nst. */ + nstfep = ir->fepvals->nstdhdl; + if (ir->bExpanded) + { + nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep); + } + if (useReplicaExchange) + { + nstfep = gmx_greatest_common_divisor(replExParams.exchangeInterval, nstfep); + } + } + + /* Be REALLY careful about what flags you set here. You CANNOT assume + * this is the first step, since we might be restarting from a checkpoint, + * and in that case we should not do any modifications to the state. + */ + bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation); + + if (continuationOptions.haveReadEkin) + { + restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate); + } + + cglo_flags = (CGLO_INITIALIZATION | CGLO_TEMPERATURE | CGLO_GSTAT + | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0) + | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) + | (continuationOptions.haveReadEkin ? CGLO_READEKIN : 0)); + + bSumEkinhOld = FALSE; + /* To minimize communication, compute_globals computes the COM velocity + * and the kinetic energy for the velocities without COM motion removed. + * Thus to get the kinetic energy without the COM contribution, we need + * to call compute_globals twice. + */ + for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++) + { + int cglo_flags_iteration = cglo_flags; + if (bStopCM && cgloIteration == 0) + { + cglo_flags_iteration |= CGLO_STOPCM; + cglo_flags_iteration &= ~CGLO_TEMPERATURE; + } + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags_iteration + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)); + } + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + if (ir->eI == eiVVAK) + { + /* a second call to get the half step temperature initialized as well */ + /* we do the same call as above, but turn the pressure off -- internally to + compute_globals, this is recognized as a velocity verlet half-step + kinetic energy calculation. This minimized excess variables, but + perhaps loses some logic?*/ + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + nullptr, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, state->box, + nullptr, &bSumEkinhOld, + cglo_flags & ~CGLO_PRESSURE); + } + + /* Calculate the initial half step temperature, and save the ekinh_old */ + if (!continuationOptions.startedFromCheckpoint) + { + for (i = 0; (i < ir->opts.ngtc); i++) + { + copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); + } + } + + /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter + temperature control */ + trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); + + if (MASTER(cr)) + { + if (!ir->bContinuation) + { + if (constr && ir->eConstrAlg == econtLINCS) + { + fprintf(fplog, + "RMS relative constraint deviation after constraining: %.2e\n", + constr->rmsd()); + } + if (EI_STATE_VELOCITY(ir->eI)) + { + real temp = enerd->term[F_TEMP]; + if (ir->eI != eiVV) + { + /* Result of Ekin averaged over velocities of -half + * and +half step, while we only have -half step here. + */ + temp *= 2; + } + fprintf(fplog, "Initial temperature: %g K\n", temp); + } + } + + char tbuf[20]; + fprintf(stderr, "starting mdrun '%s'\n", + *(top_global->name)); + if (ir->nsteps >= 0) + { + sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); + } + else + { + sprintf(tbuf, "%s", "infinite"); + } + if (ir->init_step > 0) + { + fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", + gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, + gmx_step_str(ir->init_step, sbuf2), + ir->init_step*ir->delta_t); + } + else + { + fprintf(stderr, "%s steps, %s ps.\n", + gmx_step_str(ir->nsteps, sbuf), tbuf); + } + fprintf(fplog, "\n"); + } + + walltime_accounting_start_time(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, "mdrun"); + +#if GMX_FAHCORE + /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ + int chkpt_ret = fcCheckPointParallel( cr->nodeid, + NULL, 0); + if (chkpt_ret == 0) + { + gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); + } +#endif + + /*********************************************************** + * + * Loop over MD steps + * + ************************************************************/ + + bFirstStep = TRUE; + /* Skip the first Nose-Hoover integration when we get the state from tpx */ + bInitStep = !startingFromCheckpoint || EI_VV(ir->eI); + bSumEkinhOld = FALSE; + bExchanged = FALSE; + bNeedRepartition = FALSE; + + bool simulationsShareState = false; + int nstSignalComm = nstglobalcomm; + { + // TODO This implementation of ensemble orientation restraints is nasty because + // a user can't just do multi-sim with single-sim orientation restraints. + bool usingEnsembleRestraints = (fcd->disres.nsystems > 1) || ((ms != nullptr) && (fcd->orires.nr != 0)); + bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr)); + + // Replica exchange, ensemble restraints and AWH need all + // simulations to remain synchronized, so they need + // checkpoints and stop conditions to act on the same step, so + // the propagation of such signals must take place between + // simulations, not just within simulations. + // TODO: Make algorithm initializers set these flags. + simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim; + + if (simulationsShareState) + { + // Inter-simulation signal communication does not need to happen + // often, so we use a minimum of 200 steps to reduce overhead. + const int c_minimumInterSimulationSignallingInterval = 200; + nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1)/nstglobalcomm)*nstglobalcomm; + } + } + + auto stopHandler = stopHandlerBuilder->getStopHandlerMD( + compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState, + MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm, + mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting); + + auto checkpointHandler = compat::make_unique<CheckpointHandler>( + compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), + simulationsShareState, ir->nstlist == 0, MASTER(cr), + mdrunOptions.writeConfout, mdrunOptions.checkpointOptions.period); + + const bool resetCountersIsLocal = true; + auto resetHandler = compat::make_unique<ResetHandler>( + compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]), !resetCountersIsLocal, + ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway, + mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting); + + DdOpenBalanceRegionBeforeForceComputation ddOpenBalanceRegion = (DOMAINDECOMP(cr) ? DdOpenBalanceRegionBeforeForceComputation::yes : DdOpenBalanceRegionBeforeForceComputation::no); + DdCloseBalanceRegionAfterForceComputation ddCloseBalanceRegion = (DOMAINDECOMP(cr) ? DdCloseBalanceRegionAfterForceComputation::yes : DdCloseBalanceRegionAfterForceComputation::no); + + step = ir->init_step; + step_rel = 0; + + // TODO extract this to new multi-simulation module + if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange) + { + if (!multisim_int_all_are_equal(ms, ir->nsteps)) + { + GMX_LOG(mdlog.warning).appendText( + "Note: The number of steps is not consistent across multi simulations,\n" + "but we are proceeding anyway!"); + } + if (!multisim_int_all_are_equal(ms, ir->init_step)) + { + GMX_LOG(mdlog.warning).appendText( + "Note: The initial step is not consistent across multi simulations,\n" + "but we are proceeding anyway!"); + } + } + + /* and stop now if we should */ + bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps)); + while (!bLastStep) + { + + /* Determine if this is a neighbor search step */ + bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); + + if (bPMETune && bNStList) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + pme_loadbal_do(pme_loadbal, cr, + (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr, + fplog, mdlog, + *ir, fr, *state, + wcycle, + step, step_rel, + &bPMETunePrinting); + } + + wallcycle_start(wcycle, ewcSTEP); + + bLastStep = (step_rel == ir->nsteps); + t = t0 + step*ir->delta_t; + + // TODO Refactor this, so that nstfep does not need a default value of zero + if (ir->efep != efepNO || ir->bSimTemp) + { + /* find and set the current lambdas */ + setCurrentLambdasLocal(step, ir->fepvals, lam0, state); + + bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); + bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep)); + bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) + && (ir->bExpanded) && (step > 0) && (!startingFromCheckpoint)); + } + + bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep && + do_per_step(step, replExParams.exchangeInterval)); + + if (bSimAnn) + { + update_annealing_target_temp(ir, t, upd); + } + + /* Stop Center of Mass motion */ + bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); + + /* Determine whether or not to do Neighbour Searching */ + bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition); + + bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS); + + /* do_log triggers energy and virial calculation. Because this leads + * to different code paths, forces can be different. Thus for exact + * continuation we should avoid extra log output. + * Note that the || bLastStep can result in non-exact continuation + * beyond the last step. But we don't consider that to be an issue. + */ + do_log = do_per_step(step, ir->nstlog) || (bFirstStep && !startingFromCheckpoint) || bLastStep; + do_verbose = mdrunOptions.verbose && + (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep); + + if (bNS && !(bFirstStep && ir->bContinuation)) + { + bMasterState = FALSE; + /* Correct the new box if it is too skewed */ + if (inputrecDynamicBox(ir)) + { + if (correct_box(fplog, step, state->box, graph)) + { + bMasterState = TRUE; + } + } + if (DOMAINDECOMP(cr) && bMasterState) + { + dd_collect_state(cr->dd, state, state_global); + } + + if (DOMAINDECOMP(cr)) + { + /* Repartition the domain decomposition */ + dd_partition_system(fplog, mdlog, step, cr, + bMasterState, nstglobalcomm, + state_global, top_global, ir, + state, &f, mdAtoms, top, fr, + vsite, constr, + nrnb, wcycle, + do_verbose && !bPMETunePrinting); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->natoms); + } + } + + if (MASTER(cr) && do_log) + { + print_ebin_header(fplog, step, t); /* can we improve the information printed here? */ + } + + if (ir->efep != efepNO) + { + update_mdatoms(mdatoms, state->lambda[efptMASS]); + } + + if (bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot, + constr, &nullSignaller, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS); + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + clear_mat(force_vir); + + checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep); + + /* Determine the energy and pressure: + * at nstcalcenergy steps and at energy output steps (set below). + */ + if (EI_VV(ir->eI) && (!bInitStep)) + { + bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); + } + else + { + bCalcEnerStep = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEnerStep || + (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); + } + bCalcEner = bCalcEnerStep; + + do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); + + if (do_ene || do_log || bDoReplEx) + { + bCalcVir = TRUE; + bCalcEner = TRUE; + } + + /* Do we need global communication ? */ + bGStat = (bCalcVir || bCalcEner || bStopCM || + do_per_step(step, nstglobalcomm) || + (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm))); + + force_flags = (GMX_FORCE_STATECHANGED | + ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0) | + GMX_FORCE_ALLFORCES | + (bCalcVir ? GMX_FORCE_VIRIAL : 0) | + (bCalcEner ? GMX_FORCE_ENERGY : 0) | + (bDoFEP ? GMX_FORCE_DHDL : 0) + ); + + if (shellfc) + { + /* Now is the time to relax the shells */ + relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, + enforcedRotation, step, + ir, bNS, force_flags, top, + constr, enerd, fcd, + state, f.arrayRefWithPadding(), force_vir, mdatoms, + nrnb, wcycle, graph, groups, + shellfc, fr, ppForceWorkload, t, mu_tot, + vsite, + ddOpenBalanceRegion, ddCloseBalanceRegion); + } + else + { + /* The AWH history need to be saved _before_ doing force calculations where the AWH bias is updated + (or the AWH update will be performed twice for one step when continuing). It would be best to + call this update function from do_md_trajectory_writing but that would occur after do_force. + One would have to divide the update_awh function into one function applying the AWH force + and one doing the AWH bias update. The update AWH bias function could then be called after + do_md_trajectory_writing (then containing update_awh_history). + The checkpointing will in the future probably moved to the start of the md loop which will + rid of this issue. */ + if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr)) + { + awh->updateHistory(state_global->awhHistory.get()); + } + + /* The coordinates (x) are shifted (to get whole molecules) + * in do_force. + * This is parallellized as well, and does communication too. + * Check comments in sim_util.c + */ + do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, + step, nrnb, wcycle, top, groups, + state->box, state->x.arrayRefWithPadding(), &state->hist, + f.arrayRefWithPadding(), force_vir, mdatoms, enerd, fcd, + state->lambda, graph, + fr, ppForceWorkload, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr, + (bNS ? GMX_FORCE_NS : 0) | force_flags, + ddOpenBalanceRegion, ddCloseBalanceRegion); + } + + if (EI_VV(ir->eI) && !startingFromCheckpoint) + /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ + { + rvec *vbuf = nullptr; + + wallcycle_start(wcycle, ewcUPDATE); + if (ir->eI == eiVV && bInitStep) + { + /* if using velocity verlet with full time step Ekin, + * take the first half step only to compute the + * virial for the first step. From there, + * revert back to the initial coordinates + * so that the input is actually the initial step. + */ + snew(vbuf, state->natoms); + copy_rvecn(state->v.rvec_array(), vbuf, 0, state->natoms); /* should make this better for parallelizing? */ + } + else + { + /* this is for NHC in the Ekin(t+dt/2) version of vv */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); + } + + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtVELOCITY1, + cr, constr); + + wallcycle_stop(wcycle, ewcUPDATE); + constrain_velocities(step, nullptr, + state, + shake_vir, + constr, + bCalcVir, do_log, do_ene); + wallcycle_start(wcycle, ewcUPDATE); + /* if VV, compute the pressure and constraints */ + /* For VV2, we strictly only need this if using pressure + * control, but we really would like to have accurate pressures + * printed out. + * Think about ways around this in the future? + * For now, keep this choice in comments. + */ + /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */ + /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/ + bPres = TRUE; + bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); + if (bCalcEner && ir->eI == eiVVAK) + { + bSumEkinhOld = TRUE; + } + /* for vv, the first half of the integration actually corresponds to the previous step. + So we need information from the last step in the first half of the integration */ + if (bGStat || do_per_step(step-1, nstglobalcomm)) + { + wallcycle_stop(wcycle, ewcUPDATE); + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, state->box, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | (bCalcEner ? CGLO_ENERGY : 0) + | (bTemp ? CGLO_TEMPERATURE : 0) + | (bPres ? CGLO_PRESSURE : 0) + | (bPres ? CGLO_CONSTRAINT : 0) + | (bStopCM ? CGLO_STOPCM : 0) + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + | CGLO_SCALEEKIN + ); + /* explanation of above: + a) We compute Ekin at the full time step + if 1) we are using the AveVel Ekin, and it's not the + initial step, or 2) if we are using AveEkin, but need the full + time step kinetic energy for the pressure (always true now, since we want accurate statistics). + b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in + EkinAveVel because it's needed for the pressure */ + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + wallcycle_start(wcycle, ewcUPDATE); + } + /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ + if (!bInitStep) + { + if (bTrotter) + { + m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); + + /* TODO This is only needed when we're about to write + * a checkpoint, because we use it after the restart + * (in a kludge?). But what should we be doing if + * startingFromCheckpoint or bInitStep are true? */ + if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)) + { + copy_mat(shake_vir, state->svir_prev); + copy_mat(force_vir, state->fvir_prev); + } + if (inputrecNvtTrotter(ir) && ir->eI == eiVV) + { + /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ + enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE); + enerd->term[F_EKIN] = trace(ekind->ekin); + } + } + else if (bExchanged) + { + wallcycle_stop(wcycle, ewcUPDATE); + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, nullptr, nullptr, nullptr, nullptr, mu_tot, + constr, &nullSignaller, state->box, + nullptr, &bSumEkinhOld, + CGLO_GSTAT | CGLO_TEMPERATURE); + wallcycle_start(wcycle, ewcUPDATE); + } + } + /* if it's the initial step, we performed this first step just to get the constraint virial */ + if (ir->eI == eiVV && bInitStep) + { + copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms); + sfree(vbuf); + } + wallcycle_stop(wcycle, ewcUPDATE); + } + + /* compute the conserved quantity */ + if (EI_VV(ir->eI)) + { + saved_conserved_quantity = NPT_energy(ir, state, &MassQ); + if (ir->eI == eiVV) + { + last_ekin = enerd->term[F_EKIN]; + } + if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) + { + saved_conserved_quantity -= enerd->term[F_DISPCORR]; + } + /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ + if (ir->efep != efepNO) + { + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + } + + /* ######## END FIRST UPDATE STEP ############## */ + /* ######## If doing VV, we now have v(dt) ###### */ + if (bDoExpanded) + { + /* perform extended ensemble sampling in lambda - we don't + actually move to the new state before outputting + statistics, but if performing simulated tempering, we + do update the velocities and the tau_t. */ + + lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, state->dfhist, step, state->v.rvec_array(), mdatoms); + /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ + if (MASTER(cr)) + { + copy_df_history(state_global->dfhist, state->dfhist); + } + } + + /* Now we have the energies and forces corresponding to the + * coordinates at time t. We must output all of this before + * the update. + */ + do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, + ir, state, state_global, observablesHistory, + top_global, fr, + outf, mdebin, ekind, f, + checkpointHandler->isCheckpointingStep(), + bRerunMD, bLastStep, + mdrunOptions.writeConfout, + bSumEkinhOld); + /* Check if IMD step and do IMD communication, if bIMD is TRUE. */ + bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x.rvec_array(), ir, t, wcycle); + + /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */ + if (startingFromCheckpoint && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))) + { + copy_mat(state->svir_prev, shake_vir); + copy_mat(state->fvir_prev, force_vir); + } + + stopHandler->setSignal(); + resetHandler->setSignal(walltime_accounting); + + if (bGStat || !PAR(cr)) + { + /* In parallel we only have to check for checkpointing in steps + * where we do global communication, + * otherwise the other nodes don't know. + */ + checkpointHandler->setSignal(walltime_accounting); + } + + /* ######### START SECOND UPDATE STEP ################# */ + + /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled + in preprocessing */ + + if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ + { + gmx_bool bIfRandomize; + bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, upd, constr); + /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ + if (constr && bIfRandomize) + { + constrain_velocities(step, nullptr, + state, + tmp_vir, + constr, + bCalcVir, do_log, do_ene); + } + } + /* Box is changed in update() when we do pressure coupling, + * but we should still use the old box for energy corrections and when + * writing it to the energy file, so it matches the trajectory files for + * the same timestep above. Make a copy in a separate array. + */ + copy_mat(state->box, lastbox); + + dvdl_constr = 0; + + wallcycle_start(wcycle, ewcUPDATE); + /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ + if (bTrotter) + { + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); + /* We can only do Berendsen coupling after we have summed + * the kinetic energy or virial. Since the happens + * in global_state after update, we should only do it at + * step % nstlist = 1 with bGStatEveryStep=FALSE. + */ + } + else + { + update_tcouple(step, ir, state, ekind, &MassQ, mdatoms); + update_pcouple_before_coordinates(fplog, step, ir, state, + parrinellorahmanMu, M, + bInitStep); + } + + if (EI_VV(ir->eI)) + { + /* velocity half-step update */ + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtVELOCITY2, + cr, constr); + } + + /* Above, initialize just copies ekinh into ekin, + * it doesn't copy position (for VV), + * and entire integrator for MD. + */ + + if (ir->eI == eiVVAK) + { + /* We probably only need md->homenr, not state->natoms */ + if (state->natoms > cbuf_nalloc) + { + cbuf_nalloc = state->natoms; + srenew(cbuf, cbuf_nalloc); + } + copy_rvecn(as_rvec_array(state->x.data()), cbuf, 0, state->natoms); + } + + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + constrain_coordinates(step, &dvdl_constr, state, + shake_vir, + upd, constr, + bCalcVir, do_log, do_ene); + update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, + cr, nrnb, wcycle, upd, constr, do_log, do_ene); + finish_update(ir, mdatoms, + state, graph, + nrnb, wcycle, upd, constr); + + if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM) + { + updatePrevStepPullCom(ir->pull_work, state); + } + + if (ir->eI == eiVVAK) + { + /* erase F_EKIN and F_TEMP here? */ + /* just compute the kinetic energy at the half step to perform a trotter step */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &nullSignaller, lastbox, + nullptr, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE + ); + wallcycle_start(wcycle, ewcUPDATE); + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); + /* now we know the scaling, we can compute the positions again again */ + copy_rvecn(cbuf, as_rvec_array(state->x.data()), 0, state->natoms); + + update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, + ekind, M, upd, etrtPOSITION, cr, constr); + wallcycle_stop(wcycle, ewcUPDATE); + + /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */ + /* are the small terms in the shake_vir here due + * to numerical errors, or are they important + * physically? I'm thinking they are just errors, but not completely sure. + * For now, will call without actually constraining, constr=NULL*/ + finish_update(ir, mdatoms, + state, graph, + nrnb, wcycle, upd, nullptr); + } + if (EI_VV(ir->eI)) + { + /* this factor or 2 correction is necessary + because half of the constraint force is removed + in the vv step, so we have to double it. See + the Redmine issue #1255. It is not yet clear + if the factor of 2 is exact, or just a very + good approximation, and this will be + investigated. The next step is to see if this + can be done adding a dhdl contribution from the + rattle step, but this is somewhat more + complicated with the current code. Will be + investigated, hopefully for 4.6.3. However, + this current solution is much better than + having it completely wrong. + */ + enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; + } + else + { + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + } + + if (vsite != nullptr) + { + wallcycle_start(wcycle, ewcVSITECONSTR); + if (graph != nullptr) + { + shift_self(graph, state->box, state->x.rvec_array()); + } + construct_vsites(vsite, state->x.rvec_array(), ir->delta_t, state->v.rvec_array(), + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + + if (graph != nullptr) + { + unshift_self(graph, state->box, state->x.rvec_array()); + } + wallcycle_stop(wcycle, ewcVSITECONSTR); + } + + /* ############## IF NOT VV, Calculate globals HERE ############ */ + /* With Leap-Frog we can skip compute_globals at + * non-communication steps, but we need to calculate + * the kinetic energy one step before communication. + */ + { + // Organize to do inter-simulation signalling on steps if + // and when algorithms require it. + bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm)); + + if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal) + { + // Since we're already communicating at this step, we + // can propagate intra-simulation signals. Note that + // check_nstglobalcomm has the responsibility for + // choosing the value of nstglobalcomm that is one way + // bGStat becomes true, so we can't get into a + // situation where e.g. checkpointing can't be + // signalled. + bool doIntraSimSignal = true; + SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal); + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, &signaller, + lastbox, + &totalNumberOfBondedInteractions, &bSumEkinhOld, + (bGStat ? CGLO_GSTAT : 0) + | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0) + | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) + | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) + | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) + | CGLO_CONSTRAINT + | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0) + ); + checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, + top_global, top, state, + &shouldCheckNumberOfBondedInteractions); + } + } + + /* ############# END CALC EKIN AND PRESSURE ################# */ + + /* Note: this is OK, but there are some numerical precision issues with using the convergence of + the virial that should probably be addressed eventually. state->veta has better properies, + but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could + generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ + + if (ir->efep != efepNO && !EI_VV(ir->eI)) + { + /* Sum up the foreign energy and dhdl terms for md and sd. + Currently done every step so that dhdl is correct in the .edr */ + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + + update_pcouple_after_coordinates(fplog, step, ir, mdatoms, + pres, force_vir, shake_vir, + parrinellorahmanMu, + state, nrnb, upd); + + /* ################# END UPDATE STEP 2 ################# */ + /* #### We now have r(t+dt) and v(t+dt/2) ############# */ + + /* The coordinates (x) were unshifted in update */ + if (!bGStat) + { + /* We will not sum ekinh_old, + * so signal that we still have to do it. + */ + bSumEkinhOld = TRUE; + } + + if (bCalcEner) + { + /* ######### BEGIN PREPARING EDR OUTPUT ########### */ + + /* use the directly determined last velocity, not actually the averaged half steps */ + if (bTrotter && ir->eI == eiVV) + { + enerd->term[F_EKIN] = last_ekin; + } + enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; + + if (integratorHasConservedEnergyQuantity(ir)) + { + if (EI_VV(ir->eI)) + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; + } + else + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ); + } + } + /* ######### END PREPARING EDR OUTPUT ########### */ + } + + /* Output stuff */ + if (MASTER(cr)) + { + if (fplog && do_log && bDoExpanded) + { + /* only needed if doing expanded ensemble */ + PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : nullptr, + state_global->dfhist, state->fep_state, ir->nstlog, step); + } + if (bCalcEner) + { + upd_mdebin(mdebin, bDoDHDL, bCalcEnerStep, + t, mdatoms->tmass, enerd, state, + ir->fepvals, ir->expandedvals, lastbox, + shake_vir, force_vir, total_vir, pres, + ekind, mu_tot, constr); + } + else + { + upd_mdebin_step(mdebin); + } + + gmx_bool do_dr = do_per_step(step, ir->nstdisreout); + gmx_bool do_or = do_per_step(step, ir->nstorireout); + + print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : nullptr, + step, t, + eprNORMAL, mdebin, fcd, groups, &(ir->opts), awh.get()); + + if (ir->bPull) + { + pull_print_output(ir->pull_work, step, t); + } + + if (do_per_step(step, ir->nstlog)) + { + if (fflush(fplog) != 0) + { + gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); + } + } + } + if (bDoExpanded) + { + /* Have to do this part _after_ outputting the logfile and the edr file */ + /* Gets written into the state at the beginning of next loop*/ + state->fep_state = lamnew; + } + /* Print the remaining wall clock time for the run */ + if (isMasterSimMasterRank(ms, cr) && + (do_verbose || gmx_got_usr_signal()) && + !bPMETunePrinting) + { + if (shellfc) + { + fprintf(stderr, "\n"); + } + print_time(stderr, walltime_accounting, step, ir, cr); + } + + /* Ion/water position swapping. + * Not done in last step since trajectory writing happens before this call + * in the MD loop and exchanges would be lost anyway. */ + bNeedRepartition = FALSE; + if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && + do_per_step(step, ir->swap->nstswap)) + { + bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, + as_rvec_array(state->x.data()), + state->box, + MASTER(cr) && mdrunOptions.verbose, + bRerunMD); + + if (bNeedRepartition && DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + /* Replica exchange */ + bExchanged = FALSE; + if (bDoReplEx) + { + bExchanged = replica_exchange(fplog, cr, ms, repl_ex, + state_global, enerd, + state, step, t); + } + + if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) + { + dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdAtoms, top, fr, + vsite, constr, + nrnb, wcycle, FALSE); + shouldCheckNumberOfBondedInteractions = true; + update_realloc(upd, state->natoms); + } + + bFirstStep = FALSE; + bInitStep = FALSE; + startingFromCheckpoint = false; + + /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ + /* With all integrators, except VV, we need to retain the pressure + * at the current step for coupling at the next step. + */ + if ((state->flags & (1<<estPRES_PREV)) && + (bGStatEveryStep || + (ir->nstpcouple > 0 && step % ir->nstpcouple == 0))) + { + /* Store the pressure in t_state for pressure coupling + * at the next MD step. + */ + copy_mat(pres, state->pres_prev); + } + + /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ + + if ( (membed != nullptr) && (!bLastStep) ) + { + rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data())); + } + + cycles = wallcycle_stop(wcycle, ewcSTEP); + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles, ddCyclStep); + } + + /* increase the MD step number */ + step++; + step_rel++; + + resetHandler->resetCounters( + step, step_rel, mdlog, fplog, cr, (use_GPU(fr->nbv) ? fr->nbv : nullptr), + nrnb, fr->pmedata, pme_loadbal, wcycle, walltime_accounting); + + /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */ + IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle); + + } + /* End of main MD loop */ + + /* Closing TNG files can include compressing data. Therefore it is good to do that + * before stopping the time measurements. */ + mdoutf_tng_close(outf); + + /* Stop measuring walltime */ + walltime_accounting_end_time(walltime_accounting); + + if (!thisRankHasDuty(cr, DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + if (MASTER(cr)) + { + if (ir->nstcalcenergy > 0) + { + print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, + eprAVER, mdebin, fcd, groups, &(ir->opts), awh.get()); + } + } + done_mdebin(mdebin); + done_mdoutf(outf); + + if (bPMETune) + { + pme_loadbal_done(pme_loadbal, fplog, mdlog, use_GPU(fr->nbv)); + } + + done_shellfc(fplog, shellfc, step_rel); + + if (useReplicaExchange && MASTER(cr)) + { + print_replica_exchange_statistics(fplog, repl_ex); + } + + // Clean up swapcoords + if (ir->eSwapCoords != eswapNO) + { + finish_swapcoords(ir->swap); + } + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(ir->bIMD, ir->imd); + + walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); + + destroy_enerdata(enerd); + sfree(enerd); + sfree(top); +} diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/minimize.cpp b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/minimize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..62a5a07a08daec87adf60e3947b0c3ecb006b4ad --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/minimize.cpp @@ -0,0 +1,3064 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file defines integrators for energy minimization + * + * \author Berk Hess <hess@kth.se> + * \author Erik Lindahl <erik@kth.se> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "config.h" + +#include <cmath> +#include <cstring> +#include <ctime> + +#include <algorithm> +#include <vector> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/collect.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/domdec/partition.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/fileio/confio.h" +#include "gromacs/fileio/mtxio.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/imd/imd.h" +#include "gromacs/linearalgebra/sparsematrix.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/mdsetup.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/topology/topology.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/logger.h" +#include "gromacs/utility/smalloc.h" + +#include "integrator.h" + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +extern void(*plumedcmd)(plumed,const char*,const void*); +/* END PLUMED */ + +//! Utility structure for manipulating states during EM +typedef struct { + //! Copy of the global state + t_state s; + //! Force array + PaddedVector<gmx::RVec> f; + //! Potential energy + real epot; + //! Norm of the force + real fnorm; + //! Maximum force + real fmax; + //! Direction + int a_fmax; +} em_state_t; + +//! Print the EM starting conditions +static void print_em_start(FILE *fplog, + const t_commrec *cr, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle, + const char *name) +{ + walltime_accounting_start_time(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, name); +} + +//! Stop counting time for EM +static void em_time_end(gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + wallcycle_stop(wcycle, ewcRUN); + + walltime_accounting_end_time(walltime_accounting); +} + +//! Printing a log file and console header +static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) +{ + fprintf(out, "\n"); + fprintf(out, "%s:\n", minimizer); + fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); + fprintf(out, " Number of steps = %12d\n", nsteps); +} + +//! Print warning message +static void warn_step(FILE *fp, + real ftol, + real fmax, + gmx_bool bLastStep, + gmx_bool bConstrain) +{ + constexpr bool realIsDouble = GMX_DOUBLE; + char buffer[2048]; + + if (!std::isfinite(fmax)) + { + sprintf(buffer, + "\nEnergy minimization has stopped because the force " + "on at least one atom is not finite. This usually means " + "atoms are overlapping. Modify the input coordinates to " + "remove atom overlap or use soft-core potentials with " + "the free energy code to avoid infinite forces.\n%s", + !realIsDouble ? + "You could also be lucky that switching to double precision " + "is sufficient to obtain finite forces.\n" : + ""); + } + else if (bLastStep) + { + sprintf(buffer, + "\nEnergy minimization reached the maximum number " + "of steps before the forces reached the requested " + "precision Fmax < %g.\n", ftol); + } + else + { + sprintf(buffer, + "\nEnergy minimization has stopped, but the forces have " + "not converged to the requested precision Fmax < %g (which " + "may not be possible for your system). It stopped " + "because the algorithm tried to make a new step whose size " + "was too small, or there was no change in the energy since " + "last step. Either way, we regard the minimization as " + "converged to within the available machine precision, " + "given your starting configuration and EM parameters.\n%s%s", + ftol, + !realIsDouble ? + "\nDouble precision normally gives you higher accuracy, but " + "this is often not needed for preparing to run molecular " + "dynamics.\n" : + "", + bConstrain ? + "You might need to increase your constraint accuracy, or turn\n" + "off constraints altogether (set constraints = none in mdp file)\n" : + ""); + } + + fputs(wrap_lines(buffer, 78, 0, FALSE), stderr); + fputs(wrap_lines(buffer, 78, 0, FALSE), fp); +} + +//! Print message about convergence of the EM +static void print_converged(FILE *fp, const char *alg, real ftol, + int64_t count, gmx_bool bDone, int64_t nsteps, + const em_state_t *ems, double sqrtNumAtoms) +{ + char buf[STEPSTRSIZE]; + + if (bDone) + { + fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", + alg, ftol, gmx_step_str(count, buf)); + } + else if (count < nsteps) + { + fprintf(fp, "\n%s converged to machine precision in %s steps,\n" + "but did not reach the requested Fmax < %g.\n", + alg, gmx_step_str(count, buf), ftol); + } + else + { + fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", + alg, ftol, gmx_step_str(count, buf)); + } + +#if GMX_DOUBLE + fprintf(fp, "Potential Energy = %21.14e\n", ems->epot); + fprintf(fp, "Maximum force = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1); + fprintf(fp, "Norm of force = %21.14e\n", ems->fnorm/sqrtNumAtoms); +#else + fprintf(fp, "Potential Energy = %14.7e\n", ems->epot); + fprintf(fp, "Maximum force = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1); + fprintf(fp, "Norm of force = %14.7e\n", ems->fnorm/sqrtNumAtoms); +#endif +} + +//! Compute the norm and max of the force array in parallel +static void get_f_norm_max(const t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, const rvec *f, + real *fnorm, real *fmax, int *a_fmax) +{ + double fnorm2, *sum; + real fmax2, fam; + int la_max, a_max, start, end, i, m, gf; + + /* This routine finds the largest force and returns it. + * On parallel machines the global max is taken. + */ + fnorm2 = 0; + fmax2 = 0; + la_max = -1; + start = 0; + end = mdatoms->homenr; + if (mdatoms->cFREEZE) + { + for (i = start; i < end; i++) + { + gf = mdatoms->cFREEZE[i]; + fam = 0; + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + fam += gmx::square(f[i][m]); + } + } + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + else + { + for (i = start; i < end; i++) + { + fam = norm2(f[i]); + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + + if (la_max >= 0 && DOMAINDECOMP(cr)) + { + a_max = cr->dd->globalAtomIndices[la_max]; + } + else + { + a_max = la_max; + } + if (PAR(cr)) + { + snew(sum, 2*cr->nnodes+1); + sum[2*cr->nodeid] = fmax2; + sum[2*cr->nodeid+1] = a_max; + sum[2*cr->nnodes] = fnorm2; + gmx_sumd(2*cr->nnodes+1, sum, cr); + fnorm2 = sum[2*cr->nnodes]; + /* Determine the global maximum */ + for (i = 0; i < cr->nnodes; i++) + { + if (sum[2*i] > fmax2) + { + fmax2 = sum[2*i]; + a_max = gmx::roundToInt(sum[2*i+1]); + } + } + sfree(sum); + } + + if (fnorm) + { + *fnorm = sqrt(fnorm2); + } + if (fmax) + { + *fmax = sqrt(fmax2); + } + if (a_fmax) + { + *a_fmax = a_max; + } +} + +//! Compute the norm of the force +static void get_state_f_norm_max(const t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, + em_state_t *ems) +{ + get_f_norm_max(cr, opts, mdatoms, ems->f.rvec_array(), + &ems->fnorm, &ems->fmax, &ems->a_fmax); +} + +//! Initialize the energy minimization +static void init_em(FILE *fplog, + const gmx::MDLogger &mdlog, + const char *title, + const t_commrec *cr, + const gmx_multisim_t *ms, + gmx::IMDOutputProvider *outputProvider, + t_inputrec *ir, + const MdrunOptions &mdrunOptions, + t_state *state_global, gmx_mtop_t *top_global, + em_state_t *ems, gmx_localtop_t **top, + t_nrnb *nrnb, rvec mu_tot, + t_forcerec *fr, gmx_enerdata_t **enerd, + t_graph **graph, gmx::MDAtoms *mdAtoms, gmx_global_stat_t *gstat, + gmx_vsite_t *vsite, gmx::Constraints *constr, gmx_shellfc_t **shellfc, + int nfile, const t_filenm fnm[], + gmx_mdoutf_t *outf, t_mdebin **mdebin, + gmx_wallcycle_t wcycle) +{ + real dvdl_constr; + + if (fplog) + { + fprintf(fplog, "Initiating %s\n", title); + } + + if (MASTER(cr)) + { + state_global->ngtc = 0; + + /* Initialize lambda variables */ + initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, nullptr); + } + + init_nrnb(nrnb); + + /* Interactive molecular dynamics */ + init_IMD(ir, cr, ms, top_global, fplog, 1, + MASTER(cr) ? state_global->x.rvec_array() : nullptr, + nfile, fnm, nullptr, mdrunOptions); + + if (ir->eI == eiNM) + { + GMX_ASSERT(shellfc != nullptr, "With NM we always support shells"); + + *shellfc = init_shell_flexcon(stdout, + top_global, + constr ? constr->numFlexibleConstraints() : 0, + ir->nstcalcenergy, + DOMAINDECOMP(cr)); + } + else + { + GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexible-constraint support"); + + /* With energy minimization, shells and flexible constraints are + * automatically minimized when treated like normal DOFS. + */ + if (shellfc != nullptr) + { + *shellfc = nullptr; + } + } + + auto mdatoms = mdAtoms->mdatoms(); + if (DOMAINDECOMP(cr)) + { + *top = dd_init_local_top(top_global); + + dd_init_local_state(cr->dd, state_global, &ems->s); + + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + &ems->s, &ems->f, mdAtoms, *top, + fr, vsite, constr, + nrnb, nullptr, FALSE); + dd_store_state(cr->dd, &ems->s); + + *graph = nullptr; + } + else + { + state_change_natoms(state_global, state_global->natoms); + /* Just copy the state */ + ems->s = *state_global; + state_change_natoms(&ems->s, ems->s.natoms); + ems->f.resizeWithPadding(ems->s.natoms); + + snew(*top, 1); + mdAlgorithmsSetupAtomData(cr, ir, top_global, *top, fr, + graph, mdAtoms, + constr, vsite, shellfc ? *shellfc : nullptr); + + if (vsite) + { + set_vsite_top(vsite, *top, mdatoms); + } + } + + update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]); + + if (constr) + { + // TODO how should this cross-module support dependency be managed? + if (ir->eConstrAlg == econtSHAKE && + gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) + { + gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", + econstr_names[econtSHAKE], econstr_names[econtLINCS]); + } + + if (!ir->bContinuation) + { + /* Constrain the starting coordinates */ + dvdl_constr = 0; + constr->apply(TRUE, TRUE, + -1, 0, 1.0, + ems->s.x.rvec_array(), + ems->s.x.rvec_array(), + nullptr, + ems->s.box, + ems->s.lambda[efptFEP], &dvdl_constr, + nullptr, nullptr, gmx::ConstraintVariable::Positions); + } + } + + if (PAR(cr)) + { + *gstat = global_stat_init(ir); + } + else + { + *gstat = nullptr; + } + + *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, nullptr, wcycle); + + snew(*enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + *enerd); + + if (mdebin != nullptr) + { + /* Init bin for energy stuff */ + *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, nullptr); + } + + clear_rvec(mu_tot); + calc_shifts(ems->s.box, fr->shift_vec); + + /* PLUMED */ + if(plumedswitch){ + if(ms && ms->nsim>1) { + if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&ms->mpi_comm_masters); + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all); + }else{ + (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim); + } + } + (*plumedcmd) (plumedmain,"GREX init",NULL); + } + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all); + }else{ + (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim); + } + } + (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms); + (*plumedcmd) (plumedmain,"setMDEngine","gromacs"); + (*plumedcmd) (plumedmain,"setLog",fplog); + real real_delta_t; + real_delta_t=ir->delta_t; + (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t); + (*plumedcmd) (plumedmain,"init",NULL); + + if(PAR(cr)){ + if(DOMAINDECOMP(cr)) { + int nat_home = dd_numHomeAtoms(*cr->dd); + (*plumedcmd) (plumedmain,"setAtomsNlocal",&nat_home); + (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data()); + + } + } + } + /* END PLUMED */ +} + +//! Finalize the minimization +static void finish_em(const t_commrec *cr, gmx_mdoutf_t outf, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + if (!thisRankHasDuty(cr, DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + done_mdoutf(outf); + + em_time_end(walltime_accounting, wcycle); +} + +//! Swap two different EM states during minimization +static void swap_em_state(em_state_t **ems1, em_state_t **ems2) +{ + em_state_t *tmp; + + tmp = *ems1; + *ems1 = *ems2; + *ems2 = tmp; +} + +//! Save the EM trajectory +static void write_em_traj(FILE *fplog, const t_commrec *cr, + gmx_mdoutf_t outf, + gmx_bool bX, gmx_bool bF, const char *confout, + gmx_mtop_t *top_global, + t_inputrec *ir, int64_t step, + em_state_t *state, + t_state *state_global, + ObservablesHistory *observablesHistory) +{ + int mdof_flags = 0; + + if (bX) + { + mdof_flags |= MDOF_X; + } + if (bF) + { + mdof_flags |= MDOF_F; + } + + /* If we want IMD output, set appropriate MDOF flag */ + if (ir->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, static_cast<double>(step), + &state->s, state_global, observablesHistory, + state->f); + + if (confout != nullptr) + { + if (DOMAINDECOMP(cr)) + { + /* If bX=true, x was collected to state_global in the call above */ + if (!bX) + { + gmx::ArrayRef<gmx::RVec> globalXRef = MASTER(cr) ? makeArrayRef(state_global->x) : gmx::EmptyArrayRef(); + dd_collect_vec(cr->dd, &state->s, makeArrayRef(state->s.x), globalXRef); + } + } + else + { + /* Copy the local state pointer */ + state_global = &state->s; + } + + if (MASTER(cr)) + { + if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) + { + /* Make molecules whole only for confout writing */ + do_pbc_mtop(fplog, ir->ePBC, state->s.box, top_global, + state_global->x.rvec_array()); + } + + write_sto_conf_mtop(confout, + *top_global->name, top_global, + state_global->x.rvec_array(), nullptr, ir->ePBC, state->s.box); + } + } +} + +//! \brief Do one minimization step +// +// \returns true when the step succeeded, false when a constraint error occurred +static bool do_em_step(const t_commrec *cr, + t_inputrec *ir, t_mdatoms *md, + em_state_t *ems1, real a, const PaddedVector<gmx::RVec> *force, + em_state_t *ems2, + gmx::Constraints *constr, + int64_t count) + +{ + t_state *s1, *s2; + int start, end; + real dvdl_constr; + int nthreads gmx_unused; + + bool validStep = true; + + s1 = &ems1->s; + s2 = &ems2->s; + + if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) + { + gmx_incons("state mismatch in do_em_step"); + } + + s2->flags = s1->flags; + + if (s2->natoms != s1->natoms) + { + state_change_natoms(s2, s1->natoms); + ems2->f.resizeWithPadding(s2->natoms); + } + if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size()) + { + s2->cg_gl.resize(s1->cg_gl.size()); + } + + copy_mat(s1->box, s2->box); + /* Copy free energy state */ + s2->lambda = s1->lambda; + copy_mat(s1->box, s2->box); + + start = 0; + end = md->homenr; + + nthreads = gmx_omp_nthreads_get(emntUpdate); +#pragma omp parallel num_threads(nthreads) + { + const rvec *x1 = s1->x.rvec_array(); + rvec *x2 = s2->x.rvec_array(); + const rvec *f = force->rvec_array(); + + int gf = 0; +#pragma omp for schedule(static) nowait + for (int i = start; i < end; i++) + { + try + { + if (md->cFREEZE) + { + gf = md->cFREEZE[i]; + } + for (int m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[gf][m]) + { + x2[i][m] = x1[i][m]; + } + else + { + x2[i][m] = x1[i][m] + a*f[i][m]; + } + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + + if (s2->flags & (1<<estCGP)) + { + /* Copy the CG p vector */ + const rvec *p1 = s1->cg_p.rvec_array(); + rvec *p2 = s2->cg_p.rvec_array(); +#pragma omp for schedule(static) nowait + for (int i = start; i < end; i++) + { + // Trivial OpenMP block that does not throw + copy_rvec(p1[i], p2[i]); + } + } + + if (DOMAINDECOMP(cr)) + { + s2->ddp_count = s1->ddp_count; + + /* OpenMP does not supported unsigned loop variables */ +#pragma omp for schedule(static) nowait + for (int i = 0; i < static_cast<int>(s2->cg_gl.size()); i++) + { + s2->cg_gl[i] = s1->cg_gl[i]; + } + s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; + } + } + + if (constr) + { + dvdl_constr = 0; + validStep = + constr->apply(TRUE, TRUE, + count, 0, 1.0, + s1->x.rvec_array(), s2->x.rvec_array(), + nullptr, s2->box, + s2->lambda[efptBONDED], &dvdl_constr, + nullptr, nullptr, gmx::ConstraintVariable::Positions); + + if (cr->nnodes > 1) + { + /* This global reduction will affect performance at high + * parallelization, but we can not really avoid it. + * But usually EM is not run at high parallelization. + */ + int reductionBuffer = static_cast<int>(!validStep); + gmx_sumi(1, &reductionBuffer, cr); + validStep = (reductionBuffer == 0); + } + + // We should move this check to the different minimizers + if (!validStep && ir->eI != eiSteep) + { + gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.", + EI(ir->eI), EI(eiSteep), EI(ir->eI)); + } + } + + return validStep; +} + +//! Prepare EM for using domain decomposition parallellization +static void em_dd_partition_system(FILE *fplog, + const gmx::MDLogger &mdlog, + int step, const t_commrec *cr, + gmx_mtop_t *top_global, t_inputrec *ir, + em_state_t *ems, gmx_localtop_t *top, + gmx::MDAtoms *mdAtoms, t_forcerec *fr, + gmx_vsite_t *vsite, gmx::Constraints *constr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle) +{ + /* Repartition the domain decomposition */ + dd_partition_system(fplog, mdlog, step, cr, FALSE, 1, + nullptr, top_global, ir, + &ems->s, &ems->f, + mdAtoms, top, fr, vsite, constr, + nrnb, wcycle, FALSE); + dd_store_state(cr->dd, &ems->s); +} + +namespace +{ + +/*! \brief Class to handle the work of setting and doing an energy evaluation. + * + * This class is a mere aggregate of parameters to pass to evaluate an + * energy, so that future changes to names and types of them consume + * less time when refactoring other code. + * + * Aggregate initialization is used, for which the chief risk is that + * if a member is added at the end and not all initializer lists are + * updated, then the member will be value initialized, which will + * typically mean initialization to zero. + * + * We only want to construct one of these with an initializer list, so + * we explicitly delete the default constructor. */ +class EnergyEvaluator +{ + public: + //! We only intend to construct such objects with an initializer list. +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9) + // Aspects of the C++11 spec changed after GCC 4.8.5, and + // compilation of the initializer list construction in + // runner.cpp fails in GCC 4.8.5. + EnergyEvaluator() = delete; +#endif + /*! \brief Evaluates an energy on the state in \c ems. + * + * \todo In practice, the same objects mu_tot, vir, and pres + * are always passed to this function, so we would rather have + * them as data members. However, their C-array types are + * unsuited for aggregate initialization. When the types + * improve, the call signature of this method can be reduced. + */ + void run(em_state_t *ems, rvec mu_tot, + tensor vir, tensor pres, + int64_t count, gmx_bool bFirst); + //! Handles logging (deprecated). + FILE *fplog; + //! Handles logging. + const gmx::MDLogger &mdlog; + //! Handles communication. + const t_commrec *cr; + //! Coordinates multi-simulations. + const gmx_multisim_t *ms; + //! Holds the simulation topology. + gmx_mtop_t *top_global; + //! Holds the domain topology. + gmx_localtop_t *top; + //! User input options. + t_inputrec *inputrec; + //! Manages flop accounting. + t_nrnb *nrnb; + //! Manages wall cycle accounting. + gmx_wallcycle_t wcycle; + //! Coordinates global reduction. + gmx_global_stat_t gstat; + //! Handles virtual sites. + gmx_vsite_t *vsite; + //! Handles constraints. + gmx::Constraints *constr; + //! Handles strange things. + t_fcdata *fcd; + //! Molecular graph for SHAKE. + t_graph *graph; + //! Per-atom data for this domain. + gmx::MDAtoms *mdAtoms; + //! Handles how to calculate the forces. + t_forcerec *fr; + //! Schedule of force-calculation work each step for this task. + gmx::PpForceWorkload *ppForceWorkload; + //! Stores the computed energies. + gmx_enerdata_t *enerd; +}; + +void +EnergyEvaluator::run(em_state_t *ems, rvec mu_tot, + tensor vir, tensor pres, + int64_t count, gmx_bool bFirst) +{ + real t; + gmx_bool bNS; + tensor force_vir, shake_vir, ekin; + real dvdl_constr, prescorr, enercorr, dvdlcorr; + real terminate = 0; + + /* Set the time to the initial time, the time does not change during EM */ + t = inputrec->init_t; + + if (bFirst || + (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) + { + /* This is the first state or an old state used before the last ns */ + bNS = TRUE; + } + else + { + bNS = FALSE; + if (inputrec->nstlist > 0) + { + bNS = TRUE; + } + } + + if (vsite) + { + construct_vsites(vsite, ems->s.x.rvec_array(), 1, nullptr, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, ems->s.box); + } + + if (DOMAINDECOMP(cr) && bNS) + { + /* Repartition the domain decomposition */ + em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, + ems, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Calc force & energy on new trial position */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + /* PLUMED */ + int plumedNeedsEnergy=0; + matrix plumed_vir; + if(plumedswitch){ + long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&lstep); + (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]); + (*plumedcmd) (plumedmain,"setMasses",&mdAtoms->mdatoms()->massT[0]); + (*plumedcmd) (plumedmain,"setCharges",&mdAtoms->mdatoms()->chargeA[0]); + (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]); + (*plumedcmd) (plumedmain,"prepareCalc",NULL); + (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]); + (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy); + clear_mat(plumed_vir); + (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]); + } + /* END PLUMED */ + + do_force(fplog, cr, ms, inputrec, nullptr, nullptr, + count, nrnb, wcycle, top, &top_global->groups, + ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist, + ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd, + ems->s.lambda, graph, fr, ppForceWorkload, vsite, mu_tot, t, nullptr, + GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | + GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | + (bNS ? GMX_FORCE_NS : 0), + DOMAINDECOMP(cr) ? + DdOpenBalanceRegionBeforeForceComputation::yes : + DdOpenBalanceRegionBeforeForceComputation::no, + DOMAINDECOMP(cr) ? + DdCloseBalanceRegionAfterForceComputation::yes : + DdCloseBalanceRegionAfterForceComputation::no); + /* PLUMED */ + if(plumedswitch){ + if(plumedNeedsEnergy) { + msmul(force_vir,2.0,plumed_vir); + (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]); + (*plumedcmd) (plumedmain,"performCalc",NULL); + msmul(plumed_vir,0.5,force_vir); + } else { + msmul(plumed_vir,0.5,plumed_vir); + m_add(force_vir,plumed_vir,force_vir); + } + } + /* END PLUMED */ + + /* Clear the unused shake virial and pressure */ + clear_mat(shake_vir); + clear_mat(pres); + + /* Communicate stuff when parallel */ + if (PAR(cr) && inputrec->eI != eiNM) + { + wallcycle_start(wcycle, ewcMoveE); + + global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, + inputrec, nullptr, nullptr, nullptr, 1, &terminate, + nullptr, FALSE, + CGLO_ENERGY | + CGLO_PRESSURE | + CGLO_CONSTRAINT); + + wallcycle_stop(wcycle, ewcMoveE); + } + + /* Calculate long range corrections to pressure and energy */ + calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW], + pres, force_vir, &prescorr, &enercorr, &dvdlcorr); + enerd->term[F_DISPCORR] = enercorr; + enerd->term[F_EPOT] += enercorr; + enerd->term[F_PRES] += prescorr; + enerd->term[F_DVDL] += dvdlcorr; + + ems->epot = enerd->term[F_EPOT]; + + if (constr) + { + /* Project out the constraint components of the force */ + dvdl_constr = 0; + rvec *f_rvec = ems->f.rvec_array(); + constr->apply(FALSE, FALSE, + count, 0, 1.0, + ems->s.x.rvec_array(), f_rvec, f_rvec, + ems->s.box, + ems->s.lambda[efptBONDED], &dvdl_constr, + nullptr, &shake_vir, gmx::ConstraintVariable::ForceDispl); + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + m_add(force_vir, shake_vir, vir); + } + else + { + copy_mat(force_vir, vir); + } + + clear_mat(ekin); + enerd->term[F_PRES] = + calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); + + sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); + + if (EI_ENERGY_MINIMIZATION(inputrec->eI)) + { + get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems); + } +} + +} // namespace + +//! Parallel utility summing energies and forces +static double reorder_partsum(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + t_block *cgs_gl; + int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; + double partsum; + unsigned char *grpnrFREEZE; + + if (debug) + { + fprintf(debug, "Doing reorder_partsum\n"); + } + + const rvec *fm = s_min->f.rvec_array(); + const rvec *fb = s_b->f.rvec_array(); + + cgs_gl = dd_charge_groups_global(cr->dd); + index = cgs_gl->index; + + /* Collect fm in a global vector fmg. + * This conflicts with the spirit of domain decomposition, + * but to fully optimize this a much more complicated algorithm is required. + */ + rvec *fmg; + snew(fmg, top_global->natoms); + + ncg = s_min->s.cg_gl.size(); + cg_gl = s_min->s.cg_gl.data(); + i = 0; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + copy_rvec(fm[i], fmg[a]); + i++; + } + } + gmx_sum(top_global->natoms*3, fmg[0], cr); + + /* Now we will determine the part of the sum for the cgs in state s_b */ + ncg = s_b->s.cg_gl.size(); + cg_gl = s_b->s.cg_gl.data(); + partsum = 0; + i = 0; + gf = 0; + grpnrFREEZE = top_global->groups.grpnr[egcFREEZE]; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + if (mdatoms->cFREEZE && grpnrFREEZE) + { + gf = grpnrFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; + } + } + i++; + } + } + + sfree(fmg); + + return partsum; +} + +//! Print some stuff, like beta, whatever that means. +static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + double sum; + + /* This is just the classical Polak-Ribiere calculation of beta; + * it looks a bit complicated since we take freeze groups into account, + * and might have to sum it in parallel runs. + */ + + if (!DOMAINDECOMP(cr) || + (s_min->s.ddp_count == cr->dd->ddp_count && + s_b->s.ddp_count == cr->dd->ddp_count)) + { + const rvec *fm = s_min->f.rvec_array(); + const rvec *fb = s_b->f.rvec_array(); + sum = 0; + int gf = 0; + /* This part of code can be incorrect with DD, + * since the atom ordering in s_b and s_min might differ. + */ + for (int i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (int m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + sum += (fb[i][m] - fm[i][m])*fb[i][m]; + } + } + } + } + else + { + /* We need to reorder cgs while summing */ + sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b); + } + if (PAR(cr)) + { + gmx_sumd(1, &sum, cr); + } + + return sum/gmx::square(s_min->fnorm); +} + +namespace gmx +{ + +void +Integrator::do_cg() +{ + const char *CG = "Polak-Ribiere Conjugate Gradients"; + + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + double tmp, minstep; + real stepsize; + real a, b, c, beta = 0.0; + real epot_repl = 0; + real pnorm; + t_mdebin *mdebin; + gmx_bool converged, foundlower; + rvec mu_tot; + gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; + tensor vir, pres; + int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; + gmx_mdoutf_t outf; + int m, step, nminstep; + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating conjugate gradient energy minimization via the " + "integrator .mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx minimize and an .mdp option."); + + step = 0; + + if (MASTER(cr)) + { + // In CG, the state is extended with a search direction + state_global->flags |= (1<<estCGP); + + // Ensure the extra per-atom state array gets allocated + state_change_natoms(state_global, state_global->natoms); + + // Initialize the search direction to zero + for (RVec &cg_p : state_global->cg_p) + { + cg_p = { 0, 0, 0 }; + } + } + + /* Create 4 states on the stack and extract pointers that we will swap */ + em_state_t s0 {}, s1 {}, s2 {}, s3 {}; + em_state_t *s_min = &s0; + em_state_t *s_a = &s1; + em_state_t *s_b = &s2; + em_state_t *s_c = &s3; + + /* Init em and store the local state in s_min */ + init_em(fplog, mdlog, CG, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, s_min, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, nullptr, + nfile, fnm, &outf, &mdebin, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, CG); + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + if (MASTER(cr)) + { + sp_header(stderr, CG, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, CG, inputrec->em_tol, number_steps); + } + + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Estimate/guess the initial stepsize */ + stepsize = inputrec->em_stepsize/s_min->fnorm; + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(stderr, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(fplog, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + /* Start the loop over CG steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* start taking steps in a new direction + * First time we enter the routine, beta=0, and the direction is + * simply the negative gradient. + */ + + /* Calculate the new direction in p, and the gradient in this direction, gpa */ + rvec *pm = s_min->s.cg_p.rvec_array(); + const rvec *sfm = s_min->f.rvec_array(); + double gpa = 0; + int gf = 0; + for (int i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!inputrec->opts.nFreeze[gf][m]) + { + pm[i][m] = sfm[i][m] + beta*pm[i][m]; + gpa -= pm[i][m]*sfm[i][m]; + /* f is negative gradient, thus the sign */ + } + else + { + pm[i][m] = 0; + } + } + } + + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpa, cr); + } + + /* Calculate the norm of the search vector */ + get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr); + + /* Just in case stepsize reaches zero due to numerical precision... */ + if (stepsize <= 0) + { + stepsize = inputrec->em_stepsize/pnorm; + } + + /* + * Double check the value of the derivative in the search direction. + * If it is positive it must be due to the old information in the + * CG formula, so just remove that and start over with beta=0. + * This corresponds to a steepest descent step. + */ + if (gpa > 0) + { + beta = 0; + step--; /* Don't count this step since we are restarting */ + continue; /* Go back to the beginning of the big for-loop */ + } + + /* Calculate minimum allowed stepsize, before the average (norm) + * relative change in coordinate is smaller than precision + */ + minstep = 0; + auto s_min_x = makeArrayRef(s_min->s.x); + for (int i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + tmp = fabs(s_min_x[i][m]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = pm[i][m]/tmp; + minstep += tmp*tmp; + } + } + /* Add up from all CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &minstep, cr); + } + + minstep = GMX_REAL_EPS/sqrt(minstep/(3*top_global->natoms)); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, + top_global, inputrec, step, + s_min, state_global, observablesHistory); + + /* Take a step downhill. + * In theory, we should minimize the function along this direction. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new CG step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * the continue straight to the next CG step without trying to find any minimum. + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to even accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + s_a->epot = s_min->epot; + a = 0.0; + c = a + stepsize; /* reference position along line is zero */ + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) + { + em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, + s_min, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step (new coords in s_c) */ + do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min->s.cg_p, s_c, + constr, -1); + + neval++; + /* Calculate energy for the trial step */ + energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE); + + /* Calc derivative along line */ + const rvec *pc = s_c->s.cg_p.rvec_array(); + const rvec *sfc = s_c->f.rvec_array(); + double gpc = 0; + for (int i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpc -= pc[i][m]*sfc[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + /* This is the max amount of increase in energy we tolerate */ + tmp = std::sqrt(GMX_REAL_EPS)*fabs(s_a->epot); + + /* Accept the step if the energy is lower, or if it is not significantly higher + * and the line derivative is still negative. + */ + if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) + { + foundlower = TRUE; + /* Great, we found a better energy. Increase step for next iteration + * if we are still going down, decrease it otherwise + */ + if (gpc < 0) + { + stepsize *= 1.618034; /* The golden section */ + } + else + { + stepsize *= 0.618034; /* 1/golden section */ + } + } + else + { + /* New energy is the same or higher. We will have to do some work + * to find a smaller value in the interval. Take smaller step next time! + */ + foundlower = FALSE; + stepsize *= 0.618034; + } + + + + + /* OK, if we didn't find a lower value we will have to locate one now - there must + * be one in the interval [a=0,c]. + * The same thing is valid here, though: Don't spend dozens of iterations to find + * the line minimum. We try to interpolate based on the derivative at the endpoints, + * and only continue until we find a lower value. In most cases this means 1-2 iterations. + * + * I also have a safeguard for potentially really pathological functions so we never + * take more than 20 steps before we give up ... + * + * If we already found a lower value we just skip this step and continue to the update. + */ + double gpb; + if (!foundlower) + { + nminstep = 0; + + do + { + /* Select a new trial point. + * If the derivatives at points a & c have different sign we interpolate to zero, + * otherwise just do a bisection. + */ + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, + s_min, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step to this new point - new coords in s_b */ + do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min->s.cg_p, s_b, + constr, -1); + + neval++; + /* Calculate energy for the trial step */ + energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE); + + /* p does not change within a step, but since the domain decomposition + * might change, we have to use cg_p of s_b here. + */ + const rvec *pb = s_b->s.cg_p.rvec_array(); + const rvec *sfb = s_b->f.rvec_array(); + gpb = 0; + for (int i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpb -= pb[i][m]*sfb[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + if (debug) + { + fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", + s_a->epot, s_b->epot, s_c->epot, gpb); + } + + epot_repl = s_b->epot; + + /* Keep one of the intervals based on the value of the derivative at the new point */ + if (gpb > 0) + { + /* Replace c endpoint with b */ + swap_em_state(&s_b, &s_c); + c = b; + gpc = gpb; + } + else + { + /* Replace a endpoint with b */ + swap_em_state(&s_b, &s_a); + a = b; + gpa = gpb; + } + + /* + * Stop search as soon as we find a value smaller than the endpoints. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && + (nminstep < 20)); + + if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || + nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If beta==0 this was steepest descent, and then we give up. + * If not, set beta=0 and restart with steepest descent before quitting. + */ + if (beta == 0.0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory before giving up */ + beta = 0.0; + continue; + } + } + + /* Select min energy state of A & C, put the best in B. + */ + if (s_c->epot < s_a->epot) + { + if (debug) + { + fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", + s_c->epot, s_a->epot); + } + swap_em_state(&s_b, &s_c); + gpb = gpc; + } + else + { + if (debug) + { + fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", + s_a->epot, s_c->epot); + } + swap_em_state(&s_b, &s_a); + gpb = gpa; + } + + } + else + { + if (debug) + { + fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", + s_c->epot); + } + swap_em_state(&s_b, &s_c); + gpb = gpc; + } + + /* new search direction */ + /* beta = 0 means forget all memory and restart with steepest descents. */ + if (nstcg && ((step % nstcg) == 0)) + { + beta = 0.0; + } + else + { + /* s_min->fnorm cannot be zero, because then we would have converged + * and broken out. + */ + + /* Polak-Ribiere update. + * Change to fnorm2/fnorm2_old for Fletcher-Reeves + */ + beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); + } + /* Limit beta to prevent oscillations */ + if (fabs(beta) > 5.0) + { + beta = 0.0; + } + + + /* update positions */ + swap_em_state(&s_min, &s_b); + gpa = gpb; + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (mdrunOptions.verbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, s_min->epot, s_min->fnorm/sqrtNumAtoms, + s_min->fmax, s_min->a_fmax+1); + fflush(stderr); + } + /* Store the new (lower) energies */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE); + + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Send energies and positions to the IMD client if bIMD is TRUE. */ + if (MASTER(cr) && do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x.rvec_array(), inputrec, 0, wcycle)) + { + IMD_send_positions(inputrec->imd); + } + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (s_min->fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (s_min->fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(fplog, inputrec->em_tol, s_min->fmax, + step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + if (MASTER(cr)) + { + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) + { + /* Write final value to log since we didn't do anything the last step */ + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) + { + /* Write final energy file entries */ + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + /* Note that with 0 < nstfout != nstxout we can end up with two frames + * in the trajectory with the same step number. + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); + + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + s_min, state_global, observablesHistory); + + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, + s_min, sqrtNumAtoms); + print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, + s_min, sqrtNumAtoms); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); +} + + +void +Integrator::do_lbfgs() +{ + static const char *LBFGS = "Low-Memory BFGS Minimizer"; + em_state_t ems; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + int ncorr, nmaxcorr, point, cp, neval, nminstep; + double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep; + real *rho, *alpha, *p, *s, **dx, **dg; + real a, b, c, maxdelta, delta; + real diag, Epot0; + real dgdx, dgdg, sq, yr, beta; + t_mdebin *mdebin; + gmx_bool converged; + rvec mu_tot; + gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; + tensor vir, pres; + int start, end, number_steps; + gmx_mdoutf_t outf; + int i, k, m, n, gf, step; + int mdof_flags; + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating L-BFGS energy minimization via the " + "integrator .mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx minimize and an .mdp option."); + + if (PAR(cr)) + { + gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank"); + } + + if (nullptr != constr) + { + gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); + } + + n = 3*state_global->natoms; + nmaxcorr = inputrec->nbfgscorr; + + snew(frozen, n); + + snew(p, n); + snew(rho, nmaxcorr); + snew(alpha, nmaxcorr); + + snew(dx, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dx[i], n); + } + + snew(dg, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dg[i], n); + } + + step = 0; + neval = 0; + + /* Init em */ + init_em(fplog, mdlog, LBFGS, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, &ems, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, nullptr, + nfile, fnm, &outf, &mdebin, wcycle); + + start = 0; + end = mdatoms->homenr; + + /* We need 4 working states */ + em_state_t s0 {}, s1 {}, s2 {}, s3 {}; + em_state_t *sa = &s0; + em_state_t *sb = &s1; + em_state_t *sc = &s2; + em_state_t *last = &s3; + /* Initialize by copying the state from ems (we could skip x and f here) */ + *sa = ems; + *sb = ems; + *sc = ems; + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); + + do_log = do_ene = do_x = do_f = TRUE; + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ + gf = 0; + for (i = start; i < end; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + frozen[3*i+m] = (inputrec->opts.nFreeze[gf][m] != 0); + } + } + if (MASTER(cr)) + { + sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); + } + + if (vsite) + { + construct_vsites(vsite, state_global->x.rvec_array(), 1, nullptr, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state_global->box); + } + + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole + */ + neval++; + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Set the initial step. + * since it will be multiplied by the non-normalized search direction + * vector (force vector the first time), we scale it by the + * norm of the force. + */ + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(stderr, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); + fprintf(stderr, " F-Norm = %12.5e\n", ems.fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(fplog, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); + fprintf(fplog, " F-Norm = %12.5e\n", ems.fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + + // Point is an index to the memory of search directions, where 0 is the first one. + point = 0; + + // Set initial search direction to the force (-gradient), or 0 for frozen particles. + real *fInit = static_cast<real *>(ems.f.rvec_array()[0]); + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = fInit[i]; /* Initial search direction */ + } + else + { + dx[point][i] = 0; + } + } + + // Stepsize will be modified during the search, and actually it is not critical + // (the main efficiency in the algorithm comes from changing directions), but + // we still need an initial value, so estimate it as the inverse of the norm + // so we take small steps where the potential fluctuates a lot. + stepsize = 1.0/ems.fnorm; + + /* Start the loop over BFGS steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + + ncorr = 0; + + /* Set the gradient from the force */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + mdof_flags = 0; + if (do_x) + { + mdof_flags |= MDOF_X; + } + + if (do_f) + { + mdof_flags |= MDOF_F; + } + + if (inputrec->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, static_cast<real>(step), &ems.s, state_global, observablesHistory, ems.f); + + /* Do the linesearching in the direction dx[point][0..(n-1)] */ + + /* make s a pointer to current search direction - point=0 first time we get here */ + s = dx[point]; + + real *xx = static_cast<real *>(ems.s.x.rvec_array()[0]); + real *ff = static_cast<real *>(ems.f.rvec_array()[0]); + + // calculate line gradient in position A + for (gpa = 0, i = 0; i < n; i++) + { + gpa -= s[i]*ff[i]; + } + + /* Calculate minimum allowed stepsize along the line, before the average (norm) + * relative change in coordinate is smaller than precision + */ + for (minstep = 0, i = 0; i < n; i++) + { + tmp = fabs(xx[i]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = s[i]/tmp; + minstep += tmp*tmp; + } + minstep = GMX_REAL_EPS/sqrt(minstep/n); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + // Before taking any steps along the line, store the old position + *last = ems; + real *lastx = static_cast<real *>(last->s.x.data()[0]); + real *lastf = static_cast<real *>(last->f.data()[0]); + Epot0 = ems.epot; + + *sa = ems; + + /* Take a step downhill. + * In theory, we should find the actual minimum of the function in this + * direction, somewhere along the line. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new BFGS step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * continue straight to the next BFGS step without trying to find any minimum, + * i.e. we change the search direction too. If the line was smooth, it is + * likely we are in a smooth region, and then it makes sense to take longer + * steps in the modified search direction too. + * + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. Then we need to start by finding a lower + * value before we change search direction. Since the energy was apparently + * quite rough, we need to decrease the step size. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + + // State "A" is the first position along the line. + // reference position along line is initially zero + a = 0.0; + + // Check stepsize first. We do not allow displacements + // larger than emstep. + // + do + { + // Pick a new position C by adding stepsize to A. + c = a + stepsize; + + // Calculate what the largest change in any individual coordinate + // would be (translation along line * gradient along line) + maxdelta = 0; + for (i = 0; i < n; i++) + { + delta = c*s[i]; + if (delta > maxdelta) + { + maxdelta = delta; + } + } + // If any displacement is larger than the stepsize limit, reduce the step + if (maxdelta > inputrec->em_stepsize) + { + stepsize *= 0.1; + } + } + while (maxdelta > inputrec->em_stepsize); + + // Take a trial step and move the coordinate array xc[] to position C + real *xc = static_cast<real *>(sc->s.x.rvec_array()[0]); + for (i = 0; i < n; i++) + { + xc[i] = lastx[i] + c*s[i]; + } + + neval++; + // Calculate energy for the trial step in position C + energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE); + + // Calc line gradient in position C + real *fc = static_cast<real *>(sc->f.rvec_array()[0]); + for (gpc = 0, i = 0; i < n; i++) + { + gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + // This is the max amount of increase in energy we tolerate. + // By allowing VERY small changes (close to numerical precision) we + // frequently find even better (lower) final energies. + tmp = std::sqrt(GMX_REAL_EPS)*fabs(sa->epot); + + // Accept the step if the energy is lower in the new position C (compared to A), + // or if it is not significantly higher and the line derivative is still negative. + foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp)); + // If true, great, we found a better energy. We no longer try to alter the + // stepsize, but simply accept this new better position. The we select a new + // search direction instead, which will be much more efficient than continuing + // to take smaller steps along a line. Set fnorm based on the new C position, + // which will be used to update the stepsize to 1/fnorm further down. + + // If false, the energy is NOT lower in point C, i.e. it will be the same + // or higher than in point A. In this case it is pointless to move to point C, + // so we will have to do more iterations along the same line to find a smaller + // value in the interval [A=0.0,C]. + // Here, A is still 0.0, but that will change when we do a search in the interval + // [0.0,C] below. That search we will do by interpolation or bisection rather + // than with the stepsize, so no need to modify it. For the next search direction + // it will be reset to 1/fnorm anyway. + + if (!foundlower) + { + // OK, if we didn't find a lower value we will have to locate one now - there must + // be one in the interval [a,c]. + // The same thing is valid here, though: Don't spend dozens of iterations to find + // the line minimum. We try to interpolate based on the derivative at the endpoints, + // and only continue until we find a lower value. In most cases this means 1-2 iterations. + // I also have a safeguard for potentially really pathological functions so we never + // take more than 20 steps before we give up. + // If we already found a lower value we just skip this step and continue to the update. + real fnorm = 0; + nminstep = 0; + do + { + // Select a new trial point B in the interval [A,C]. + // If the derivatives at points a & c have different sign we interpolate to zero, + // otherwise just do a bisection since there might be multiple minima/maxima + // inside the interval. + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + // Take a trial step to point B + real *xb = static_cast<real *>(sb->s.x.rvec_array()[0]); + for (i = 0; i < n; i++) + { + xb[i] = lastx[i] + b*s[i]; + } + + neval++; + // Calculate energy for the trial step in point B + energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE); + fnorm = sb->fnorm; + + // Calculate gradient in point B + real *fb = static_cast<real *>(sb->f.rvec_array()[0]); + for (gpb = 0, i = 0; i < n; i++) + { + gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ + + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative + // at the new point B, and rename the endpoints of this new interval A and C. + if (gpb > 0) + { + /* Replace c endpoint with b */ + c = b; + /* swap states b and c */ + swap_em_state(&sb, &sc); + } + else + { + /* Replace a endpoint with b */ + a = b; + /* swap states a and b */ + swap_em_state(&sa, &sb); + } + + /* + * Stop search as soon as we find a value smaller than the endpoints, + * or if the tolerance is below machine precision. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20)); + + if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If ncorr==0 this was steepest descent, and then we give up. + * If not, reset memory to restart as steepest descent before quitting. + */ + if (ncorr == 0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory */ + ncorr = 0; + /* Search in gradient direction */ + for (i = 0; i < n; i++) + { + dx[point][i] = ff[i]; + } + /* Reset stepsize */ + stepsize = 1.0/fnorm; + continue; + } + } + + /* Select min energy state of A & C, put the best in xx/ff/Epot + */ + if (sc->epot < sa->epot) + { + /* Use state C */ + ems = *sc; + step_taken = c; + } + else + { + /* Use state A */ + ems = *sa; + step_taken = a; + } + + } + else + { + /* found lower */ + /* Use state C */ + ems = *sc; + step_taken = c; + } + + /* Update the memory information, and calculate a new + * approximation of the inverse hessian + */ + + /* Have new data in Epot, xx, ff */ + if (ncorr < nmaxcorr) + { + ncorr++; + } + + for (i = 0; i < n; i++) + { + dg[point][i] = lastf[i]-ff[i]; + dx[point][i] *= step_taken; + } + + dgdg = 0; + dgdx = 0; + for (i = 0; i < n; i++) + { + dgdg += dg[point][i]*dg[point][i]; + dgdx += dg[point][i]*dx[point][i]; + } + + diag = dgdx/dgdg; + + rho[point] = 1.0/dgdx; + point++; + + if (point >= nmaxcorr) + { + point = 0; + } + + /* Update */ + for (i = 0; i < n; i++) + { + p[i] = ff[i]; + } + + cp = point; + + /* Recursive update. First go back over the memory points */ + for (k = 0; k < ncorr; k++) + { + cp--; + if (cp < 0) + { + cp = ncorr-1; + } + + sq = 0; + for (i = 0; i < n; i++) + { + sq += dx[cp][i]*p[i]; + } + + alpha[cp] = rho[cp]*sq; + + for (i = 0; i < n; i++) + { + p[i] -= alpha[cp]*dg[cp][i]; + } + } + + for (i = 0; i < n; i++) + { + p[i] *= diag; + } + + /* And then go forward again */ + for (k = 0; k < ncorr; k++) + { + yr = 0; + for (i = 0; i < n; i++) + { + yr += p[i]*dg[cp][i]; + } + + beta = rho[cp]*yr; + beta = alpha[cp]-beta; + + for (i = 0; i < n; i++) + { + p[i] += beta*dx[cp][i]; + } + + cp++; + if (cp >= ncorr) + { + cp = 0; + } + } + + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = p[i]; + } + else + { + dx[point][i] = 0; + } + } + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (mdrunOptions.verbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, ems.epot, ems.fnorm/sqrtNumAtoms, ems.fmax, ems.a_fmax + 1); + fflush(stderr); + } + /* Store the new (lower) energies */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Send x and E to IMD client, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x.rvec_array(), inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + // Reset stepsize in we are doing more iterations + stepsize = 1.0/ems.fnorm; + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (ems.fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (ems.fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(fplog, inputrec->em_tol, ems.fmax, + step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) /* Write final value to log since we didn't do anythin last step */ + { + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) /* Write final energy file entries */ + { + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = !do_per_step(step, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + &ems, state_global, observablesHistory); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, + number_steps, &ems, sqrtNumAtoms); + print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, + number_steps, &ems, sqrtNumAtoms); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); +} + +void +Integrator::do_steep() +{ + const char *SD = "Steepest Descents"; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + real stepsize; + real ustep; + gmx_mdoutf_t outf; + t_mdebin *mdebin; + gmx_bool bDone, bAbort, do_x, do_f; + tensor vir, pres; + rvec mu_tot; + int nsteps; + int count = 0; + int steps_accepted = 0; + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating steepest-descent energy minimization via the " + "integrator .mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx minimize and an .mdp option."); + + /* Create 2 states on the stack and extract pointers that we will swap */ + em_state_t s0 {}, s1 {}; + em_state_t *s_min = &s0; + em_state_t *s_try = &s1; + + /* Init em and store the local state in s_try */ + init_em(fplog, mdlog, SD, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, s_try, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, nullptr, + nfile, fnm, &outf, &mdebin, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, SD); + + /* Set variables for stepsize (in nm). This is the largest + * step that we are going to make in any direction. + */ + ustep = inputrec->em_stepsize; + stepsize = 0; + + /* Max number of steps */ + nsteps = inputrec->nsteps; + + if (MASTER(cr)) + { + /* Print to the screen */ + sp_header(stderr, SD, inputrec->em_tol, nsteps); + } + if (fplog) + { + sp_header(fplog, SD, inputrec->em_tol, nsteps); + } + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + + /**** HERE STARTS THE LOOP **** + * count is the counter for the number of steps + * bDone will be TRUE when the minimization has converged + * bAbort will be TRUE when nsteps steps have been performed or when + * the stepsize becomes smaller than is reasonable for machine precision + */ + count = 0; + bDone = FALSE; + bAbort = FALSE; + while (!bDone && !bAbort) + { + bAbort = (nsteps >= 0) && (count == nsteps); + + /* set new coordinates, except for first step */ + bool validStep = true; + if (count > 0) + { + validStep = + do_em_step(cr, inputrec, mdatoms, + s_min, stepsize, &s_min->f, s_try, + constr, count); + } + + if (validStep) + { + energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0); + } + else + { + // Signal constraint error during stepping with energy=inf + s_try->epot = std::numeric_limits<real>::infinity(); + } + + if (MASTER(cr)) + { + print_ebin_header(fplog, count, count); + } + + if (count == 0) + { + s_min->epot = s_try->epot; + } + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (mdrunOptions.verbose) + { + fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", + count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, + ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r'); + fflush(stderr); + } + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + /* Store the new (lower) energies */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(count), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, + nullBox, nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE); + + print_ebin(mdoutf_get_fp_ene(outf), TRUE, + do_per_step(steps_accepted, inputrec->nstdisreout), + do_per_step(steps_accepted, inputrec->nstorireout), + fplog, count, count, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + fflush(fplog); + } + } + + /* Now if the new energy is smaller than the previous... + * or if this is the first step! + * or if we did random steps! + */ + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + steps_accepted++; + + /* Test whether the convergence criterion is met... */ + bDone = (s_try->fmax < inputrec->em_tol); + + /* Copy the arrays for force, positions and energy */ + /* The 'Min' array always holds the coords and forces of the minimal + sampled energy */ + swap_em_state(&s_min, &s_try); + if (count > 0) + { + ustep *= 1.2; + } + + /* Write to trn, if necessary */ + do_x = do_per_step(steps_accepted, inputrec->nstxout); + do_f = do_per_step(steps_accepted, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, + top_global, inputrec, count, + s_min, state_global, observablesHistory); + } + else + { + /* If energy is not smaller make the step smaller... */ + ustep *= 0.5; + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, + s_min, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + } + + /* Determine new step */ + stepsize = ustep/s_min->fmax; + + /* Check if stepsize is too small, with 1 nm as a characteristic length */ +#if GMX_DOUBLE + if (count == nsteps || ustep < 1e-12) +#else + if (count == nsteps || ustep < 1e-6) +#endif + { + if (MASTER(cr)) + { + warn_step(fplog, inputrec->em_tol, s_min->fmax, + count == nsteps, constr != nullptr); + } + bAbort = TRUE; + } + + /* Send IMD energies and positions, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, + MASTER(cr) ? state_global->x.rvec_array() : nullptr, + inputrec, 0, wcycle) && + MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + count++; + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + /* Print some data... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, count, + s_min, state_global, observablesHistory); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + + print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, + s_min, sqrtNumAtoms); + print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, + s_min, sqrtNumAtoms); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + inputrec->nsteps = count; + + walltime_accounting_set_nsteps_done(walltime_accounting, count); +} + +void +Integrator::do_nm() +{ + const char *NM = "Normal Mode Analysis"; + gmx_mdoutf_t outf; + int nnodes, node; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + tensor vir, pres; + rvec mu_tot; + rvec *dfdx; + gmx_bool bSparse; /* use sparse matrix storage format */ + size_t sz; + gmx_sparsematrix_t * sparse_matrix = nullptr; + real * full_matrix = nullptr; + + /* added with respect to mdrun */ + int row, col; + real der_range = 10.0*std::sqrt(GMX_REAL_EPS); + real x_min; + bool bIsMaster = MASTER(cr); + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating normal-mode analysis via the integrator " + ".mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx normal-modes."); + + if (constr != nullptr) + { + gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); + } + + gmx_shellfc_t *shellfc; + + em_state_t state_work {}; + + /* Init em and store the local state in state_minimum */ + init_em(fplog, mdlog, NM, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, &state_work, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, &shellfc, + nfile, fnm, &outf, nullptr, wcycle); + + std::vector<int> atom_index = get_atom_index(top_global); + std::vector<gmx::RVec> fneg(atom_index.size(), {0, 0, 0}); + snew(dfdx, atom_index.size()); + +#if !GMX_DOUBLE + if (bIsMaster) + { + fprintf(stderr, + "NOTE: This version of GROMACS has been compiled in single precision,\n" + " which MIGHT not be accurate enough for normal mode analysis.\n" + " GROMACS now uses sparse matrix storage, so the memory requirements\n" + " are fairly modest even if you recompile in double precision.\n\n"); + } +#endif + + /* Check if we can/should use sparse storage format. + * + * Sparse format is only useful when the Hessian itself is sparse, which it + * will be when we use a cutoff. + * For small systems (n<1000) it is easier to always use full matrix format, though. + */ + if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0) + { + GMX_LOG(mdlog.warning).appendText("Non-cutoff electrostatics used, forcing full Hessian format."); + bSparse = FALSE; + } + else if (atom_index.size() < 1000) + { + GMX_LOG(mdlog.warning).appendTextFormatted("Small system size (N=%zu), using full Hessian format.", + atom_index.size()); + bSparse = FALSE; + } + else + { + GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format."); + bSparse = TRUE; + } + + /* Number of dimensions, based on real atoms, that is not vsites or shell */ + sz = DIM*atom_index.size(); + + fprintf(stderr, "Allocating Hessian memory...\n\n"); + + if (bSparse) + { + sparse_matrix = gmx_sparsematrix_init(sz); + sparse_matrix->compressed_symmetric = TRUE; + } + else + { + snew(full_matrix, sz*sz); + } + + init_nrnb(nrnb); + + + /* Write start time and temperature */ + print_em_start(fplog, cr, walltime_accounting, wcycle, NM); + + /* fudge nr of steps to nr of atoms */ + inputrec->nsteps = atom_index.size()*2; + + if (bIsMaster) + { + fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n", + *(top_global->name), inputrec->nsteps); + } + + nnodes = cr->nnodes; + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE); + cr->nnodes = nnodes; + + /* if forces are not small, warn user */ + get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work); + + GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax); + if (state_work.fmax > 1.0e-3) + { + GMX_LOG(mdlog.warning).appendText( + "The force is probably not small enough to " + "ensure that you are at a minimum.\n" + "Be aware that negative eigenvalues may occur\n" + "when the resulting matrix is diagonalized."); + } + + /*********************************************************** + * + * Loop over all pairs in matrix + * + * do_force called twice. Once with positive and + * once with negative displacement + * + ************************************************************/ + + /* Steps are divided one by one over the nodes */ + bool bNS = true; + auto state_work_x = makeArrayRef(state_work.s.x); + auto state_work_f = makeArrayRef(state_work.f); + for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes) + { + size_t atom = atom_index[aid]; + for (size_t d = 0; d < DIM; d++) + { + int64_t step = 0; + int force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES; + double t = 0; + + x_min = state_work_x[atom][d]; + + for (unsigned int dx = 0; (dx < 2); dx++) + { + if (dx == 0) + { + state_work_x[atom][d] = x_min - der_range; + } + else + { + state_work_x[atom][d] = x_min + der_range; + } + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + if (shellfc) + { + /* Now is the time to relax the shells */ + relax_shell_flexcon(fplog, + cr, + ms, + mdrunOptions.verbose, + nullptr, + step, + inputrec, + bNS, + force_flags, + top, + constr, + enerd, + fcd, + &state_work.s, + state_work.f.arrayRefWithPadding(), + vir, + mdatoms, + nrnb, + wcycle, + graph, + &top_global->groups, + shellfc, + fr, + ppForceWorkload, + t, + mu_tot, + vsite, + DdOpenBalanceRegionBeforeForceComputation::no, + DdCloseBalanceRegionAfterForceComputation::no); + bNS = false; + step++; + } + else + { + energyEvaluator.run(&state_work, mu_tot, vir, pres, aid*2+dx, FALSE); + } + + cr->nnodes = nnodes; + + if (dx == 0) + { + std::copy(state_work_f.begin(), state_work_f.begin()+atom_index.size(), fneg.begin()); + } + } + + /* x is restored to original */ + state_work_x[atom][d] = x_min; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; (k < DIM); k++) + { + dfdx[j][k] = + -(state_work_f[atom_index[j]][k] - fneg[j][k])/(2*der_range); + } + } + + if (!bIsMaster) + { +#if GMX_MPI +#define mpi_type GMX_MPI_REAL + MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr), + cr->nodeid, cr->mpi_comm_mygroup); +#endif + } + else + { + for (node = 0; (node < nnodes && aid+node < atom_index.size()); node++) + { + if (node > 0) + { +#if GMX_MPI + MPI_Status stat; + MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node, + cr->mpi_comm_mygroup, &stat); +#undef mpi_type +#endif + } + + row = (aid + node)*DIM + d; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; k < DIM; k++) + { + col = j*DIM + k; + + if (bSparse) + { + if (col >= row && dfdx[j][k] != 0.0) + { + gmx_sparsematrix_increment_value(sparse_matrix, + row, col, dfdx[j][k]); + } + } + else + { + full_matrix[row*sz+col] = dfdx[j][k]; + } + } + } + } + } + + if (mdrunOptions.verbose && fplog) + { + fflush(fplog); + } + } + /* write progress */ + if (bIsMaster && mdrunOptions.verbose) + { + fprintf(stderr, "\rFinished step %d out of %d", + static_cast<int>(std::min(atom+nnodes, atom_index.size())), + static_cast<int>(atom_index.size())); + fflush(stderr); + } + } + + if (bIsMaster) + { + fprintf(stderr, "\n\nWriting Hessian...\n"); + gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2); +} + +} // namespace gmx diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/minimize.cpp.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/minimize.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..eb8e910468fd99643d8d9f5aa061caa7a10c13ab --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/minimize.cpp.preplumed @@ -0,0 +1,2988 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief This file defines integrators for energy minimization + * + * \author Berk Hess <hess@kth.se> + * \author Erik Lindahl <erik@kth.se> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "config.h" + +#include <cmath> +#include <cstring> +#include <ctime> + +#include <algorithm> +#include <vector> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/domdec/collect.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/domdec/partition.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/fileio/confio.h" +#include "gromacs/fileio/mtxio.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/imd/imd.h" +#include "gromacs/linearalgebra/sparsematrix.h" +#include "gromacs/listed-forces/manage-threading.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/constr.h" +#include "gromacs/mdlib/force.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdebin.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/mdsetup.h" +#include "gromacs/mdlib/ns.h" +#include "gromacs/mdlib/shellfc.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/tgroup.h" +#include "gromacs/mdlib/trajectory_writing.h" +#include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/vsite.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/mshift.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/topology/topology.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/logger.h" +#include "gromacs/utility/smalloc.h" + +#include "integrator.h" + +//! Utility structure for manipulating states during EM +typedef struct { + //! Copy of the global state + t_state s; + //! Force array + PaddedVector<gmx::RVec> f; + //! Potential energy + real epot; + //! Norm of the force + real fnorm; + //! Maximum force + real fmax; + //! Direction + int a_fmax; +} em_state_t; + +//! Print the EM starting conditions +static void print_em_start(FILE *fplog, + const t_commrec *cr, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle, + const char *name) +{ + walltime_accounting_start_time(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, name); +} + +//! Stop counting time for EM +static void em_time_end(gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + wallcycle_stop(wcycle, ewcRUN); + + walltime_accounting_end_time(walltime_accounting); +} + +//! Printing a log file and console header +static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps) +{ + fprintf(out, "\n"); + fprintf(out, "%s:\n", minimizer); + fprintf(out, " Tolerance (Fmax) = %12.5e\n", ftol); + fprintf(out, " Number of steps = %12d\n", nsteps); +} + +//! Print warning message +static void warn_step(FILE *fp, + real ftol, + real fmax, + gmx_bool bLastStep, + gmx_bool bConstrain) +{ + constexpr bool realIsDouble = GMX_DOUBLE; + char buffer[2048]; + + if (!std::isfinite(fmax)) + { + sprintf(buffer, + "\nEnergy minimization has stopped because the force " + "on at least one atom is not finite. This usually means " + "atoms are overlapping. Modify the input coordinates to " + "remove atom overlap or use soft-core potentials with " + "the free energy code to avoid infinite forces.\n%s", + !realIsDouble ? + "You could also be lucky that switching to double precision " + "is sufficient to obtain finite forces.\n" : + ""); + } + else if (bLastStep) + { + sprintf(buffer, + "\nEnergy minimization reached the maximum number " + "of steps before the forces reached the requested " + "precision Fmax < %g.\n", ftol); + } + else + { + sprintf(buffer, + "\nEnergy minimization has stopped, but the forces have " + "not converged to the requested precision Fmax < %g (which " + "may not be possible for your system). It stopped " + "because the algorithm tried to make a new step whose size " + "was too small, or there was no change in the energy since " + "last step. Either way, we regard the minimization as " + "converged to within the available machine precision, " + "given your starting configuration and EM parameters.\n%s%s", + ftol, + !realIsDouble ? + "\nDouble precision normally gives you higher accuracy, but " + "this is often not needed for preparing to run molecular " + "dynamics.\n" : + "", + bConstrain ? + "You might need to increase your constraint accuracy, or turn\n" + "off constraints altogether (set constraints = none in mdp file)\n" : + ""); + } + + fputs(wrap_lines(buffer, 78, 0, FALSE), stderr); + fputs(wrap_lines(buffer, 78, 0, FALSE), fp); +} + +//! Print message about convergence of the EM +static void print_converged(FILE *fp, const char *alg, real ftol, + int64_t count, gmx_bool bDone, int64_t nsteps, + const em_state_t *ems, double sqrtNumAtoms) +{ + char buf[STEPSTRSIZE]; + + if (bDone) + { + fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", + alg, ftol, gmx_step_str(count, buf)); + } + else if (count < nsteps) + { + fprintf(fp, "\n%s converged to machine precision in %s steps,\n" + "but did not reach the requested Fmax < %g.\n", + alg, gmx_step_str(count, buf), ftol); + } + else + { + fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", + alg, ftol, gmx_step_str(count, buf)); + } + +#if GMX_DOUBLE + fprintf(fp, "Potential Energy = %21.14e\n", ems->epot); + fprintf(fp, "Maximum force = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1); + fprintf(fp, "Norm of force = %21.14e\n", ems->fnorm/sqrtNumAtoms); +#else + fprintf(fp, "Potential Energy = %14.7e\n", ems->epot); + fprintf(fp, "Maximum force = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1); + fprintf(fp, "Norm of force = %14.7e\n", ems->fnorm/sqrtNumAtoms); +#endif +} + +//! Compute the norm and max of the force array in parallel +static void get_f_norm_max(const t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, const rvec *f, + real *fnorm, real *fmax, int *a_fmax) +{ + double fnorm2, *sum; + real fmax2, fam; + int la_max, a_max, start, end, i, m, gf; + + /* This routine finds the largest force and returns it. + * On parallel machines the global max is taken. + */ + fnorm2 = 0; + fmax2 = 0; + la_max = -1; + start = 0; + end = mdatoms->homenr; + if (mdatoms->cFREEZE) + { + for (i = start; i < end; i++) + { + gf = mdatoms->cFREEZE[i]; + fam = 0; + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + fam += gmx::square(f[i][m]); + } + } + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + else + { + for (i = start; i < end; i++) + { + fam = norm2(f[i]); + fnorm2 += fam; + if (fam > fmax2) + { + fmax2 = fam; + la_max = i; + } + } + } + + if (la_max >= 0 && DOMAINDECOMP(cr)) + { + a_max = cr->dd->globalAtomIndices[la_max]; + } + else + { + a_max = la_max; + } + if (PAR(cr)) + { + snew(sum, 2*cr->nnodes+1); + sum[2*cr->nodeid] = fmax2; + sum[2*cr->nodeid+1] = a_max; + sum[2*cr->nnodes] = fnorm2; + gmx_sumd(2*cr->nnodes+1, sum, cr); + fnorm2 = sum[2*cr->nnodes]; + /* Determine the global maximum */ + for (i = 0; i < cr->nnodes; i++) + { + if (sum[2*i] > fmax2) + { + fmax2 = sum[2*i]; + a_max = gmx::roundToInt(sum[2*i+1]); + } + } + sfree(sum); + } + + if (fnorm) + { + *fnorm = sqrt(fnorm2); + } + if (fmax) + { + *fmax = sqrt(fmax2); + } + if (a_fmax) + { + *a_fmax = a_max; + } +} + +//! Compute the norm of the force +static void get_state_f_norm_max(const t_commrec *cr, + t_grpopts *opts, t_mdatoms *mdatoms, + em_state_t *ems) +{ + get_f_norm_max(cr, opts, mdatoms, ems->f.rvec_array(), + &ems->fnorm, &ems->fmax, &ems->a_fmax); +} + +//! Initialize the energy minimization +static void init_em(FILE *fplog, + const gmx::MDLogger &mdlog, + const char *title, + const t_commrec *cr, + const gmx_multisim_t *ms, + gmx::IMDOutputProvider *outputProvider, + t_inputrec *ir, + const MdrunOptions &mdrunOptions, + t_state *state_global, gmx_mtop_t *top_global, + em_state_t *ems, gmx_localtop_t **top, + t_nrnb *nrnb, rvec mu_tot, + t_forcerec *fr, gmx_enerdata_t **enerd, + t_graph **graph, gmx::MDAtoms *mdAtoms, gmx_global_stat_t *gstat, + gmx_vsite_t *vsite, gmx::Constraints *constr, gmx_shellfc_t **shellfc, + int nfile, const t_filenm fnm[], + gmx_mdoutf_t *outf, t_mdebin **mdebin, + gmx_wallcycle_t wcycle) +{ + real dvdl_constr; + + if (fplog) + { + fprintf(fplog, "Initiating %s\n", title); + } + + if (MASTER(cr)) + { + state_global->ngtc = 0; + + /* Initialize lambda variables */ + initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, nullptr); + } + + init_nrnb(nrnb); + + /* Interactive molecular dynamics */ + init_IMD(ir, cr, ms, top_global, fplog, 1, + MASTER(cr) ? state_global->x.rvec_array() : nullptr, + nfile, fnm, nullptr, mdrunOptions); + + if (ir->eI == eiNM) + { + GMX_ASSERT(shellfc != nullptr, "With NM we always support shells"); + + *shellfc = init_shell_flexcon(stdout, + top_global, + constr ? constr->numFlexibleConstraints() : 0, + ir->nstcalcenergy, + DOMAINDECOMP(cr)); + } + else + { + GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI), "This else currently only handles energy minimizers, consider if your algorithm needs shell/flexible-constraint support"); + + /* With energy minimization, shells and flexible constraints are + * automatically minimized when treated like normal DOFS. + */ + if (shellfc != nullptr) + { + *shellfc = nullptr; + } + } + + auto mdatoms = mdAtoms->mdatoms(); + if (DOMAINDECOMP(cr)) + { + *top = dd_init_local_top(top_global); + + dd_init_local_state(cr->dd, state_global, &ems->s); + + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + &ems->s, &ems->f, mdAtoms, *top, + fr, vsite, constr, + nrnb, nullptr, FALSE); + dd_store_state(cr->dd, &ems->s); + + *graph = nullptr; + } + else + { + state_change_natoms(state_global, state_global->natoms); + /* Just copy the state */ + ems->s = *state_global; + state_change_natoms(&ems->s, ems->s.natoms); + ems->f.resizeWithPadding(ems->s.natoms); + + snew(*top, 1); + mdAlgorithmsSetupAtomData(cr, ir, top_global, *top, fr, + graph, mdAtoms, + constr, vsite, shellfc ? *shellfc : nullptr); + + if (vsite) + { + set_vsite_top(vsite, *top, mdatoms); + } + } + + update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]); + + if (constr) + { + // TODO how should this cross-module support dependency be managed? + if (ir->eConstrAlg == econtSHAKE && + gmx_mtop_ftype_count(top_global, F_CONSTR) > 0) + { + gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n", + econstr_names[econtSHAKE], econstr_names[econtLINCS]); + } + + if (!ir->bContinuation) + { + /* Constrain the starting coordinates */ + dvdl_constr = 0; + constr->apply(TRUE, TRUE, + -1, 0, 1.0, + ems->s.x.rvec_array(), + ems->s.x.rvec_array(), + nullptr, + ems->s.box, + ems->s.lambda[efptFEP], &dvdl_constr, + nullptr, nullptr, gmx::ConstraintVariable::Positions); + } + } + + if (PAR(cr)) + { + *gstat = global_stat_init(ir); + } + else + { + *gstat = nullptr; + } + + *outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, ir, top_global, nullptr, wcycle); + + snew(*enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + *enerd); + + if (mdebin != nullptr) + { + /* Init bin for energy stuff */ + *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, nullptr); + } + + clear_rvec(mu_tot); + calc_shifts(ems->s.box, fr->shift_vec); +} + +//! Finalize the minimization +static void finish_em(const t_commrec *cr, gmx_mdoutf_t outf, + gmx_walltime_accounting_t walltime_accounting, + gmx_wallcycle_t wcycle) +{ + if (!thisRankHasDuty(cr, DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + done_mdoutf(outf); + + em_time_end(walltime_accounting, wcycle); +} + +//! Swap two different EM states during minimization +static void swap_em_state(em_state_t **ems1, em_state_t **ems2) +{ + em_state_t *tmp; + + tmp = *ems1; + *ems1 = *ems2; + *ems2 = tmp; +} + +//! Save the EM trajectory +static void write_em_traj(FILE *fplog, const t_commrec *cr, + gmx_mdoutf_t outf, + gmx_bool bX, gmx_bool bF, const char *confout, + gmx_mtop_t *top_global, + t_inputrec *ir, int64_t step, + em_state_t *state, + t_state *state_global, + ObservablesHistory *observablesHistory) +{ + int mdof_flags = 0; + + if (bX) + { + mdof_flags |= MDOF_X; + } + if (bF) + { + mdof_flags |= MDOF_F; + } + + /* If we want IMD output, set appropriate MDOF flag */ + if (ir->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, static_cast<double>(step), + &state->s, state_global, observablesHistory, + state->f); + + if (confout != nullptr) + { + if (DOMAINDECOMP(cr)) + { + /* If bX=true, x was collected to state_global in the call above */ + if (!bX) + { + gmx::ArrayRef<gmx::RVec> globalXRef = MASTER(cr) ? makeArrayRef(state_global->x) : gmx::EmptyArrayRef(); + dd_collect_vec(cr->dd, &state->s, makeArrayRef(state->s.x), globalXRef); + } + } + else + { + /* Copy the local state pointer */ + state_global = &state->s; + } + + if (MASTER(cr)) + { + if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr)) + { + /* Make molecules whole only for confout writing */ + do_pbc_mtop(fplog, ir->ePBC, state->s.box, top_global, + state_global->x.rvec_array()); + } + + write_sto_conf_mtop(confout, + *top_global->name, top_global, + state_global->x.rvec_array(), nullptr, ir->ePBC, state->s.box); + } + } +} + +//! \brief Do one minimization step +// +// \returns true when the step succeeded, false when a constraint error occurred +static bool do_em_step(const t_commrec *cr, + t_inputrec *ir, t_mdatoms *md, + em_state_t *ems1, real a, const PaddedVector<gmx::RVec> *force, + em_state_t *ems2, + gmx::Constraints *constr, + int64_t count) + +{ + t_state *s1, *s2; + int start, end; + real dvdl_constr; + int nthreads gmx_unused; + + bool validStep = true; + + s1 = &ems1->s; + s2 = &ems2->s; + + if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count) + { + gmx_incons("state mismatch in do_em_step"); + } + + s2->flags = s1->flags; + + if (s2->natoms != s1->natoms) + { + state_change_natoms(s2, s1->natoms); + ems2->f.resizeWithPadding(s2->natoms); + } + if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size()) + { + s2->cg_gl.resize(s1->cg_gl.size()); + } + + copy_mat(s1->box, s2->box); + /* Copy free energy state */ + s2->lambda = s1->lambda; + copy_mat(s1->box, s2->box); + + start = 0; + end = md->homenr; + + nthreads = gmx_omp_nthreads_get(emntUpdate); +#pragma omp parallel num_threads(nthreads) + { + const rvec *x1 = s1->x.rvec_array(); + rvec *x2 = s2->x.rvec_array(); + const rvec *f = force->rvec_array(); + + int gf = 0; +#pragma omp for schedule(static) nowait + for (int i = start; i < end; i++) + { + try + { + if (md->cFREEZE) + { + gf = md->cFREEZE[i]; + } + for (int m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[gf][m]) + { + x2[i][m] = x1[i][m]; + } + else + { + x2[i][m] = x1[i][m] + a*f[i][m]; + } + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + + if (s2->flags & (1<<estCGP)) + { + /* Copy the CG p vector */ + const rvec *p1 = s1->cg_p.rvec_array(); + rvec *p2 = s2->cg_p.rvec_array(); +#pragma omp for schedule(static) nowait + for (int i = start; i < end; i++) + { + // Trivial OpenMP block that does not throw + copy_rvec(p1[i], p2[i]); + } + } + + if (DOMAINDECOMP(cr)) + { + s2->ddp_count = s1->ddp_count; + + /* OpenMP does not supported unsigned loop variables */ +#pragma omp for schedule(static) nowait + for (int i = 0; i < static_cast<int>(s2->cg_gl.size()); i++) + { + s2->cg_gl[i] = s1->cg_gl[i]; + } + s2->ddp_count_cg_gl = s1->ddp_count_cg_gl; + } + } + + if (constr) + { + dvdl_constr = 0; + validStep = + constr->apply(TRUE, TRUE, + count, 0, 1.0, + s1->x.rvec_array(), s2->x.rvec_array(), + nullptr, s2->box, + s2->lambda[efptBONDED], &dvdl_constr, + nullptr, nullptr, gmx::ConstraintVariable::Positions); + + if (cr->nnodes > 1) + { + /* This global reduction will affect performance at high + * parallelization, but we can not really avoid it. + * But usually EM is not run at high parallelization. + */ + int reductionBuffer = static_cast<int>(!validStep); + gmx_sumi(1, &reductionBuffer, cr); + validStep = (reductionBuffer == 0); + } + + // We should move this check to the different minimizers + if (!validStep && ir->eI != eiSteep) + { + gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.", + EI(ir->eI), EI(eiSteep), EI(ir->eI)); + } + } + + return validStep; +} + +//! Prepare EM for using domain decomposition parallellization +static void em_dd_partition_system(FILE *fplog, + const gmx::MDLogger &mdlog, + int step, const t_commrec *cr, + gmx_mtop_t *top_global, t_inputrec *ir, + em_state_t *ems, gmx_localtop_t *top, + gmx::MDAtoms *mdAtoms, t_forcerec *fr, + gmx_vsite_t *vsite, gmx::Constraints *constr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle) +{ + /* Repartition the domain decomposition */ + dd_partition_system(fplog, mdlog, step, cr, FALSE, 1, + nullptr, top_global, ir, + &ems->s, &ems->f, + mdAtoms, top, fr, vsite, constr, + nrnb, wcycle, FALSE); + dd_store_state(cr->dd, &ems->s); +} + +namespace +{ + +/*! \brief Class to handle the work of setting and doing an energy evaluation. + * + * This class is a mere aggregate of parameters to pass to evaluate an + * energy, so that future changes to names and types of them consume + * less time when refactoring other code. + * + * Aggregate initialization is used, for which the chief risk is that + * if a member is added at the end and not all initializer lists are + * updated, then the member will be value initialized, which will + * typically mean initialization to zero. + * + * We only want to construct one of these with an initializer list, so + * we explicitly delete the default constructor. */ +class EnergyEvaluator +{ + public: + //! We only intend to construct such objects with an initializer list. +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9) + // Aspects of the C++11 spec changed after GCC 4.8.5, and + // compilation of the initializer list construction in + // runner.cpp fails in GCC 4.8.5. + EnergyEvaluator() = delete; +#endif + /*! \brief Evaluates an energy on the state in \c ems. + * + * \todo In practice, the same objects mu_tot, vir, and pres + * are always passed to this function, so we would rather have + * them as data members. However, their C-array types are + * unsuited for aggregate initialization. When the types + * improve, the call signature of this method can be reduced. + */ + void run(em_state_t *ems, rvec mu_tot, + tensor vir, tensor pres, + int64_t count, gmx_bool bFirst); + //! Handles logging (deprecated). + FILE *fplog; + //! Handles logging. + const gmx::MDLogger &mdlog; + //! Handles communication. + const t_commrec *cr; + //! Coordinates multi-simulations. + const gmx_multisim_t *ms; + //! Holds the simulation topology. + gmx_mtop_t *top_global; + //! Holds the domain topology. + gmx_localtop_t *top; + //! User input options. + t_inputrec *inputrec; + //! Manages flop accounting. + t_nrnb *nrnb; + //! Manages wall cycle accounting. + gmx_wallcycle_t wcycle; + //! Coordinates global reduction. + gmx_global_stat_t gstat; + //! Handles virtual sites. + gmx_vsite_t *vsite; + //! Handles constraints. + gmx::Constraints *constr; + //! Handles strange things. + t_fcdata *fcd; + //! Molecular graph for SHAKE. + t_graph *graph; + //! Per-atom data for this domain. + gmx::MDAtoms *mdAtoms; + //! Handles how to calculate the forces. + t_forcerec *fr; + //! Schedule of force-calculation work each step for this task. + gmx::PpForceWorkload *ppForceWorkload; + //! Stores the computed energies. + gmx_enerdata_t *enerd; +}; + +void +EnergyEvaluator::run(em_state_t *ems, rvec mu_tot, + tensor vir, tensor pres, + int64_t count, gmx_bool bFirst) +{ + real t; + gmx_bool bNS; + tensor force_vir, shake_vir, ekin; + real dvdl_constr, prescorr, enercorr, dvdlcorr; + real terminate = 0; + + /* Set the time to the initial time, the time does not change during EM */ + t = inputrec->init_t; + + if (bFirst || + (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) + { + /* This is the first state or an old state used before the last ns */ + bNS = TRUE; + } + else + { + bNS = FALSE; + if (inputrec->nstlist > 0) + { + bNS = TRUE; + } + } + + if (vsite) + { + construct_vsites(vsite, ems->s.x.rvec_array(), 1, nullptr, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, ems->s.box); + } + + if (DOMAINDECOMP(cr) && bNS) + { + /* Repartition the domain decomposition */ + em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, + ems, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Calc force & energy on new trial position */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + do_force(fplog, cr, ms, inputrec, nullptr, nullptr, + count, nrnb, wcycle, top, &top_global->groups, + ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist, + ems->f.arrayRefWithPadding(), force_vir, mdAtoms->mdatoms(), enerd, fcd, + ems->s.lambda, graph, fr, ppForceWorkload, vsite, mu_tot, t, nullptr, + GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | + GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | + (bNS ? GMX_FORCE_NS : 0), + DOMAINDECOMP(cr) ? + DdOpenBalanceRegionBeforeForceComputation::yes : + DdOpenBalanceRegionBeforeForceComputation::no, + DOMAINDECOMP(cr) ? + DdCloseBalanceRegionAfterForceComputation::yes : + DdCloseBalanceRegionAfterForceComputation::no); + + /* Clear the unused shake virial and pressure */ + clear_mat(shake_vir); + clear_mat(pres); + + /* Communicate stuff when parallel */ + if (PAR(cr) && inputrec->eI != eiNM) + { + wallcycle_start(wcycle, ewcMoveE); + + global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot, + inputrec, nullptr, nullptr, nullptr, 1, &terminate, + nullptr, FALSE, + CGLO_ENERGY | + CGLO_PRESSURE | + CGLO_CONSTRAINT); + + wallcycle_stop(wcycle, ewcMoveE); + } + + /* Calculate long range corrections to pressure and energy */ + calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW], + pres, force_vir, &prescorr, &enercorr, &dvdlcorr); + enerd->term[F_DISPCORR] = enercorr; + enerd->term[F_EPOT] += enercorr; + enerd->term[F_PRES] += prescorr; + enerd->term[F_DVDL] += dvdlcorr; + + ems->epot = enerd->term[F_EPOT]; + + if (constr) + { + /* Project out the constraint components of the force */ + dvdl_constr = 0; + rvec *f_rvec = ems->f.rvec_array(); + constr->apply(FALSE, FALSE, + count, 0, 1.0, + ems->s.x.rvec_array(), f_rvec, f_rvec, + ems->s.box, + ems->s.lambda[efptBONDED], &dvdl_constr, + nullptr, &shake_vir, gmx::ConstraintVariable::ForceDispl); + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + m_add(force_vir, shake_vir, vir); + } + else + { + copy_mat(force_vir, vir); + } + + clear_mat(ekin); + enerd->term[F_PRES] = + calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres); + + sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals); + + if (EI_ENERGY_MINIMIZATION(inputrec->eI)) + { + get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems); + } +} + +} // namespace + +//! Parallel utility summing energies and forces +static double reorder_partsum(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + t_block *cgs_gl; + int ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m; + double partsum; + unsigned char *grpnrFREEZE; + + if (debug) + { + fprintf(debug, "Doing reorder_partsum\n"); + } + + const rvec *fm = s_min->f.rvec_array(); + const rvec *fb = s_b->f.rvec_array(); + + cgs_gl = dd_charge_groups_global(cr->dd); + index = cgs_gl->index; + + /* Collect fm in a global vector fmg. + * This conflicts with the spirit of domain decomposition, + * but to fully optimize this a much more complicated algorithm is required. + */ + rvec *fmg; + snew(fmg, top_global->natoms); + + ncg = s_min->s.cg_gl.size(); + cg_gl = s_min->s.cg_gl.data(); + i = 0; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + copy_rvec(fm[i], fmg[a]); + i++; + } + } + gmx_sum(top_global->natoms*3, fmg[0], cr); + + /* Now we will determine the part of the sum for the cgs in state s_b */ + ncg = s_b->s.cg_gl.size(); + cg_gl = s_b->s.cg_gl.data(); + partsum = 0; + i = 0; + gf = 0; + grpnrFREEZE = top_global->groups.grpnr[egcFREEZE]; + for (c = 0; c < ncg; c++) + { + cg = cg_gl[c]; + a0 = index[cg]; + a1 = index[cg+1]; + for (a = a0; a < a1; a++) + { + if (mdatoms->cFREEZE && grpnrFREEZE) + { + gf = grpnrFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + partsum += (fb[i][m] - fmg[a][m])*fb[i][m]; + } + } + i++; + } + } + + sfree(fmg); + + return partsum; +} + +//! Print some stuff, like beta, whatever that means. +static real pr_beta(const t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms, + gmx_mtop_t *top_global, + em_state_t *s_min, em_state_t *s_b) +{ + double sum; + + /* This is just the classical Polak-Ribiere calculation of beta; + * it looks a bit complicated since we take freeze groups into account, + * and might have to sum it in parallel runs. + */ + + if (!DOMAINDECOMP(cr) || + (s_min->s.ddp_count == cr->dd->ddp_count && + s_b->s.ddp_count == cr->dd->ddp_count)) + { + const rvec *fm = s_min->f.rvec_array(); + const rvec *fb = s_b->f.rvec_array(); + sum = 0; + int gf = 0; + /* This part of code can be incorrect with DD, + * since the atom ordering in s_b and s_min might differ. + */ + for (int i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (int m = 0; m < DIM; m++) + { + if (!opts->nFreeze[gf][m]) + { + sum += (fb[i][m] - fm[i][m])*fb[i][m]; + } + } + } + } + else + { + /* We need to reorder cgs while summing */ + sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b); + } + if (PAR(cr)) + { + gmx_sumd(1, &sum, cr); + } + + return sum/gmx::square(s_min->fnorm); +} + +namespace gmx +{ + +void +Integrator::do_cg() +{ + const char *CG = "Polak-Ribiere Conjugate Gradients"; + + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + double tmp, minstep; + real stepsize; + real a, b, c, beta = 0.0; + real epot_repl = 0; + real pnorm; + t_mdebin *mdebin; + gmx_bool converged, foundlower; + rvec mu_tot; + gmx_bool do_log = FALSE, do_ene = FALSE, do_x, do_f; + tensor vir, pres; + int number_steps, neval = 0, nstcg = inputrec->nstcgsteep; + gmx_mdoutf_t outf; + int m, step, nminstep; + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating conjugate gradient energy minimization via the " + "integrator .mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx minimize and an .mdp option."); + + step = 0; + + if (MASTER(cr)) + { + // In CG, the state is extended with a search direction + state_global->flags |= (1<<estCGP); + + // Ensure the extra per-atom state array gets allocated + state_change_natoms(state_global, state_global->natoms); + + // Initialize the search direction to zero + for (RVec &cg_p : state_global->cg_p) + { + cg_p = { 0, 0, 0 }; + } + } + + /* Create 4 states on the stack and extract pointers that we will swap */ + em_state_t s0 {}, s1 {}, s2 {}, s3 {}; + em_state_t *s_min = &s0; + em_state_t *s_a = &s1; + em_state_t *s_b = &s2; + em_state_t *s_c = &s3; + + /* Init em and store the local state in s_min */ + init_em(fplog, mdlog, CG, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, s_min, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, nullptr, + nfile, fnm, &outf, &mdebin, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, CG); + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + if (MASTER(cr)) + { + sp_header(stderr, CG, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, CG, inputrec->em_tol, number_steps); + } + + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole in congrad.c + */ + energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Estimate/guess the initial stepsize */ + stepsize = inputrec->em_stepsize/s_min->fnorm; + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(stderr, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, " F-max = %12.5e on atom %d\n", + s_min->fmax, s_min->a_fmax+1); + fprintf(fplog, " F-Norm = %12.5e\n", + s_min->fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + /* Start the loop over CG steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* start taking steps in a new direction + * First time we enter the routine, beta=0, and the direction is + * simply the negative gradient. + */ + + /* Calculate the new direction in p, and the gradient in this direction, gpa */ + rvec *pm = s_min->s.cg_p.rvec_array(); + const rvec *sfm = s_min->f.rvec_array(); + double gpa = 0; + int gf = 0; + for (int i = 0; i < mdatoms->homenr; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + if (!inputrec->opts.nFreeze[gf][m]) + { + pm[i][m] = sfm[i][m] + beta*pm[i][m]; + gpa -= pm[i][m]*sfm[i][m]; + /* f is negative gradient, thus the sign */ + } + else + { + pm[i][m] = 0; + } + } + } + + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpa, cr); + } + + /* Calculate the norm of the search vector */ + get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr); + + /* Just in case stepsize reaches zero due to numerical precision... */ + if (stepsize <= 0) + { + stepsize = inputrec->em_stepsize/pnorm; + } + + /* + * Double check the value of the derivative in the search direction. + * If it is positive it must be due to the old information in the + * CG formula, so just remove that and start over with beta=0. + * This corresponds to a steepest descent step. + */ + if (gpa > 0) + { + beta = 0; + step--; /* Don't count this step since we are restarting */ + continue; /* Go back to the beginning of the big for-loop */ + } + + /* Calculate minimum allowed stepsize, before the average (norm) + * relative change in coordinate is smaller than precision + */ + minstep = 0; + auto s_min_x = makeArrayRef(s_min->s.x); + for (int i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + tmp = fabs(s_min_x[i][m]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = pm[i][m]/tmp; + minstep += tmp*tmp; + } + } + /* Add up from all CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &minstep, cr); + } + + minstep = GMX_REAL_EPS/sqrt(minstep/(3*top_global->natoms)); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, + top_global, inputrec, step, + s_min, state_global, observablesHistory); + + /* Take a step downhill. + * In theory, we should minimize the function along this direction. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new CG step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * the continue straight to the next CG step without trying to find any minimum. + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to even accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + s_a->epot = s_min->epot; + a = 0.0; + c = a + stepsize; /* reference position along line is zero */ + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) + { + em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, + s_min, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step (new coords in s_c) */ + do_em_step(cr, inputrec, mdatoms, s_min, c, &s_min->s.cg_p, s_c, + constr, -1); + + neval++; + /* Calculate energy for the trial step */ + energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE); + + /* Calc derivative along line */ + const rvec *pc = s_c->s.cg_p.rvec_array(); + const rvec *sfc = s_c->f.rvec_array(); + double gpc = 0; + for (int i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpc -= pc[i][m]*sfc[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + /* This is the max amount of increase in energy we tolerate */ + tmp = std::sqrt(GMX_REAL_EPS)*fabs(s_a->epot); + + /* Accept the step if the energy is lower, or if it is not significantly higher + * and the line derivative is still negative. + */ + if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) + { + foundlower = TRUE; + /* Great, we found a better energy. Increase step for next iteration + * if we are still going down, decrease it otherwise + */ + if (gpc < 0) + { + stepsize *= 1.618034; /* The golden section */ + } + else + { + stepsize *= 0.618034; /* 1/golden section */ + } + } + else + { + /* New energy is the same or higher. We will have to do some work + * to find a smaller value in the interval. Take smaller step next time! + */ + foundlower = FALSE; + stepsize *= 0.618034; + } + + + + + /* OK, if we didn't find a lower value we will have to locate one now - there must + * be one in the interval [a=0,c]. + * The same thing is valid here, though: Don't spend dozens of iterations to find + * the line minimum. We try to interpolate based on the derivative at the endpoints, + * and only continue until we find a lower value. In most cases this means 1-2 iterations. + * + * I also have a safeguard for potentially really pathological functions so we never + * take more than 20 steps before we give up ... + * + * If we already found a lower value we just skip this step and continue to the update. + */ + double gpb; + if (!foundlower) + { + nminstep = 0; + + do + { + /* Select a new trial point. + * If the derivatives at points a & c have different sign we interpolate to zero, + * otherwise just do a bisection. + */ + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, + s_min, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + + /* Take a trial step to this new point - new coords in s_b */ + do_em_step(cr, inputrec, mdatoms, s_min, b, &s_min->s.cg_p, s_b, + constr, -1); + + neval++; + /* Calculate energy for the trial step */ + energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE); + + /* p does not change within a step, but since the domain decomposition + * might change, we have to use cg_p of s_b here. + */ + const rvec *pb = s_b->s.cg_p.rvec_array(); + const rvec *sfb = s_b->f.rvec_array(); + gpb = 0; + for (int i = 0; i < mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + gpb -= pb[i][m]*sfb[i][m]; /* f is negative gradient, thus the sign */ + } + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + if (debug) + { + fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", + s_a->epot, s_b->epot, s_c->epot, gpb); + } + + epot_repl = s_b->epot; + + /* Keep one of the intervals based on the value of the derivative at the new point */ + if (gpb > 0) + { + /* Replace c endpoint with b */ + swap_em_state(&s_b, &s_c); + c = b; + gpc = gpb; + } + else + { + /* Replace a endpoint with b */ + swap_em_state(&s_b, &s_a); + a = b; + gpa = gpb; + } + + /* + * Stop search as soon as we find a value smaller than the endpoints. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && + (nminstep < 20)); + + if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS || + nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If beta==0 this was steepest descent, and then we give up. + * If not, set beta=0 and restart with steepest descent before quitting. + */ + if (beta == 0.0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory before giving up */ + beta = 0.0; + continue; + } + } + + /* Select min energy state of A & C, put the best in B. + */ + if (s_c->epot < s_a->epot) + { + if (debug) + { + fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", + s_c->epot, s_a->epot); + } + swap_em_state(&s_b, &s_c); + gpb = gpc; + } + else + { + if (debug) + { + fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", + s_a->epot, s_c->epot); + } + swap_em_state(&s_b, &s_a); + gpb = gpa; + } + + } + else + { + if (debug) + { + fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", + s_c->epot); + } + swap_em_state(&s_b, &s_c); + gpb = gpc; + } + + /* new search direction */ + /* beta = 0 means forget all memory and restart with steepest descents. */ + if (nstcg && ((step % nstcg) == 0)) + { + beta = 0.0; + } + else + { + /* s_min->fnorm cannot be zero, because then we would have converged + * and broken out. + */ + + /* Polak-Ribiere update. + * Change to fnorm2/fnorm2_old for Fletcher-Reeves + */ + beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b); + } + /* Limit beta to prevent oscillations */ + if (fabs(beta) > 5.0) + { + beta = 0.0; + } + + + /* update positions */ + swap_em_state(&s_min, &s_b); + gpa = gpb; + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (mdrunOptions.verbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, s_min->epot, s_min->fnorm/sqrtNumAtoms, + s_min->fmax, s_min->a_fmax+1); + fflush(stderr); + } + /* Store the new (lower) energies */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE); + + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Send energies and positions to the IMD client if bIMD is TRUE. */ + if (MASTER(cr) && do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x.rvec_array(), inputrec, 0, wcycle)) + { + IMD_send_positions(inputrec->imd); + } + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (s_min->fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (s_min->fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(fplog, inputrec->em_tol, s_min->fmax, + step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + if (MASTER(cr)) + { + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) + { + /* Write final value to log since we didn't do anything the last step */ + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) + { + /* Write final energy file entries */ + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + /* Note that with 0 < nstfout != nstxout we can end up with two frames + * in the trajectory with the same step number. + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout)); + + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + s_min, state_global, observablesHistory); + + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, + s_min, sqrtNumAtoms); + print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, + s_min, sqrtNumAtoms); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); +} + + +void +Integrator::do_lbfgs() +{ + static const char *LBFGS = "Low-Memory BFGS Minimizer"; + em_state_t ems; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + int ncorr, nmaxcorr, point, cp, neval, nminstep; + double stepsize, step_taken, gpa, gpb, gpc, tmp, minstep; + real *rho, *alpha, *p, *s, **dx, **dg; + real a, b, c, maxdelta, delta; + real diag, Epot0; + real dgdx, dgdg, sq, yr, beta; + t_mdebin *mdebin; + gmx_bool converged; + rvec mu_tot; + gmx_bool do_log, do_ene, do_x, do_f, foundlower, *frozen; + tensor vir, pres; + int start, end, number_steps; + gmx_mdoutf_t outf; + int i, k, m, n, gf, step; + int mdof_flags; + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating L-BFGS energy minimization via the " + "integrator .mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx minimize and an .mdp option."); + + if (PAR(cr)) + { + gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank"); + } + + if (nullptr != constr) + { + gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent)."); + } + + n = 3*state_global->natoms; + nmaxcorr = inputrec->nbfgscorr; + + snew(frozen, n); + + snew(p, n); + snew(rho, nmaxcorr); + snew(alpha, nmaxcorr); + + snew(dx, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dx[i], n); + } + + snew(dg, nmaxcorr); + for (i = 0; i < nmaxcorr; i++) + { + snew(dg[i], n); + } + + step = 0; + neval = 0; + + /* Init em */ + init_em(fplog, mdlog, LBFGS, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, &ems, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, nullptr, + nfile, fnm, &outf, &mdebin, wcycle); + + start = 0; + end = mdatoms->homenr; + + /* We need 4 working states */ + em_state_t s0 {}, s1 {}, s2 {}, s3 {}; + em_state_t *sa = &s0; + em_state_t *sb = &s1; + em_state_t *sc = &s2; + em_state_t *last = &s3; + /* Initialize by copying the state from ems (we could skip x and f here) */ + *sa = ems; + *sb = ems; + *sc = ems; + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS); + + do_log = do_ene = do_x = do_f = TRUE; + + /* Max number of steps */ + number_steps = inputrec->nsteps; + + /* Create a 3*natoms index to tell whether each degree of freedom is frozen */ + gf = 0; + for (i = start; i < end; i++) + { + if (mdatoms->cFREEZE) + { + gf = mdatoms->cFREEZE[i]; + } + for (m = 0; m < DIM; m++) + { + frozen[3*i+m] = (inputrec->opts.nFreeze[gf][m] != 0); + } + } + if (MASTER(cr)) + { + sp_header(stderr, LBFGS, inputrec->em_tol, number_steps); + } + if (fplog) + { + sp_header(fplog, LBFGS, inputrec->em_tol, number_steps); + } + + if (vsite) + { + construct_vsites(vsite, state_global->x.rvec_array(), 1, nullptr, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state_global->box); + } + + /* Call the force routine and some auxiliary (neighboursearching etc.) */ + /* do_force always puts the charge groups in the box and shifts again + * We do not unshift, so molecules are always whole + */ + neval++; + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE); + + if (MASTER(cr)) + { + /* Copy stuff to the energy bin for easy printing etc. */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + print_ebin_header(fplog, step, step); + print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Set the initial step. + * since it will be multiplied by the non-normalized search direction + * vector (force vector the first time), we scale it by the + * norm of the force. + */ + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(stderr, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); + fprintf(stderr, " F-Norm = %12.5e\n", ems.fnorm/sqrtNumAtoms); + fprintf(stderr, "\n"); + /* and copy to the log file too... */ + fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr); + fprintf(fplog, " F-max = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1); + fprintf(fplog, " F-Norm = %12.5e\n", ems.fnorm/sqrtNumAtoms); + fprintf(fplog, "\n"); + } + + // Point is an index to the memory of search directions, where 0 is the first one. + point = 0; + + // Set initial search direction to the force (-gradient), or 0 for frozen particles. + real *fInit = static_cast<real *>(ems.f.rvec_array()[0]); + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = fInit[i]; /* Initial search direction */ + } + else + { + dx[point][i] = 0; + } + } + + // Stepsize will be modified during the search, and actually it is not critical + // (the main efficiency in the algorithm comes from changing directions), but + // we still need an initial value, so estimate it as the inverse of the norm + // so we take small steps where the potential fluctuates a lot. + stepsize = 1.0/ems.fnorm; + + /* Start the loop over BFGS steps. + * Each successful step is counted, and we continue until + * we either converge or reach the max number of steps. + */ + + ncorr = 0; + + /* Set the gradient from the force */ + converged = FALSE; + for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++) + { + + /* Write coordinates if necessary */ + do_x = do_per_step(step, inputrec->nstxout); + do_f = do_per_step(step, inputrec->nstfout); + + mdof_flags = 0; + if (do_x) + { + mdof_flags |= MDOF_X; + } + + if (do_f) + { + mdof_flags |= MDOF_F; + } + + if (inputrec->bIMD) + { + mdof_flags |= MDOF_IMD; + } + + mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, + top_global, step, static_cast<real>(step), &ems.s, state_global, observablesHistory, ems.f); + + /* Do the linesearching in the direction dx[point][0..(n-1)] */ + + /* make s a pointer to current search direction - point=0 first time we get here */ + s = dx[point]; + + real *xx = static_cast<real *>(ems.s.x.rvec_array()[0]); + real *ff = static_cast<real *>(ems.f.rvec_array()[0]); + + // calculate line gradient in position A + for (gpa = 0, i = 0; i < n; i++) + { + gpa -= s[i]*ff[i]; + } + + /* Calculate minimum allowed stepsize along the line, before the average (norm) + * relative change in coordinate is smaller than precision + */ + for (minstep = 0, i = 0; i < n; i++) + { + tmp = fabs(xx[i]); + if (tmp < 1.0) + { + tmp = 1.0; + } + tmp = s[i]/tmp; + minstep += tmp*tmp; + } + minstep = GMX_REAL_EPS/sqrt(minstep/n); + + if (stepsize < minstep) + { + converged = TRUE; + break; + } + + // Before taking any steps along the line, store the old position + *last = ems; + real *lastx = static_cast<real *>(last->s.x.data()[0]); + real *lastf = static_cast<real *>(last->f.data()[0]); + Epot0 = ems.epot; + + *sa = ems; + + /* Take a step downhill. + * In theory, we should find the actual minimum of the function in this + * direction, somewhere along the line. + * That is quite possible, but it turns out to take 5-10 function evaluations + * for each line. However, we dont really need to find the exact minimum - + * it is much better to start a new BFGS step in a modified direction as soon + * as we are close to it. This will save a lot of energy evaluations. + * + * In practice, we just try to take a single step. + * If it worked (i.e. lowered the energy), we increase the stepsize but + * continue straight to the next BFGS step without trying to find any minimum, + * i.e. we change the search direction too. If the line was smooth, it is + * likely we are in a smooth region, and then it makes sense to take longer + * steps in the modified search direction too. + * + * If it didn't work (higher energy), there must be a minimum somewhere between + * the old position and the new one. Then we need to start by finding a lower + * value before we change search direction. Since the energy was apparently + * quite rough, we need to decrease the step size. + * + * Due to the finite numerical accuracy, it turns out that it is a good idea + * to accept a SMALL increase in energy, if the derivative is still downhill. + * This leads to lower final energies in the tests I've done. / Erik + */ + + // State "A" is the first position along the line. + // reference position along line is initially zero + a = 0.0; + + // Check stepsize first. We do not allow displacements + // larger than emstep. + // + do + { + // Pick a new position C by adding stepsize to A. + c = a + stepsize; + + // Calculate what the largest change in any individual coordinate + // would be (translation along line * gradient along line) + maxdelta = 0; + for (i = 0; i < n; i++) + { + delta = c*s[i]; + if (delta > maxdelta) + { + maxdelta = delta; + } + } + // If any displacement is larger than the stepsize limit, reduce the step + if (maxdelta > inputrec->em_stepsize) + { + stepsize *= 0.1; + } + } + while (maxdelta > inputrec->em_stepsize); + + // Take a trial step and move the coordinate array xc[] to position C + real *xc = static_cast<real *>(sc->s.x.rvec_array()[0]); + for (i = 0; i < n; i++) + { + xc[i] = lastx[i] + c*s[i]; + } + + neval++; + // Calculate energy for the trial step in position C + energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE); + + // Calc line gradient in position C + real *fc = static_cast<real *>(sc->f.rvec_array()[0]); + for (gpc = 0, i = 0; i < n; i++) + { + gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */ + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpc, cr); + } + + // This is the max amount of increase in energy we tolerate. + // By allowing VERY small changes (close to numerical precision) we + // frequently find even better (lower) final energies. + tmp = std::sqrt(GMX_REAL_EPS)*fabs(sa->epot); + + // Accept the step if the energy is lower in the new position C (compared to A), + // or if it is not significantly higher and the line derivative is still negative. + foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp)); + // If true, great, we found a better energy. We no longer try to alter the + // stepsize, but simply accept this new better position. The we select a new + // search direction instead, which will be much more efficient than continuing + // to take smaller steps along a line. Set fnorm based on the new C position, + // which will be used to update the stepsize to 1/fnorm further down. + + // If false, the energy is NOT lower in point C, i.e. it will be the same + // or higher than in point A. In this case it is pointless to move to point C, + // so we will have to do more iterations along the same line to find a smaller + // value in the interval [A=0.0,C]. + // Here, A is still 0.0, but that will change when we do a search in the interval + // [0.0,C] below. That search we will do by interpolation or bisection rather + // than with the stepsize, so no need to modify it. For the next search direction + // it will be reset to 1/fnorm anyway. + + if (!foundlower) + { + // OK, if we didn't find a lower value we will have to locate one now - there must + // be one in the interval [a,c]. + // The same thing is valid here, though: Don't spend dozens of iterations to find + // the line minimum. We try to interpolate based on the derivative at the endpoints, + // and only continue until we find a lower value. In most cases this means 1-2 iterations. + // I also have a safeguard for potentially really pathological functions so we never + // take more than 20 steps before we give up. + // If we already found a lower value we just skip this step and continue to the update. + real fnorm = 0; + nminstep = 0; + do + { + // Select a new trial point B in the interval [A,C]. + // If the derivatives at points a & c have different sign we interpolate to zero, + // otherwise just do a bisection since there might be multiple minima/maxima + // inside the interval. + if (gpa < 0 && gpc > 0) + { + b = a + gpa*(a-c)/(gpc-gpa); + } + else + { + b = 0.5*(a+c); + } + + /* safeguard if interpolation close to machine accuracy causes errors: + * never go outside the interval + */ + if (b <= a || b >= c) + { + b = 0.5*(a+c); + } + + // Take a trial step to point B + real *xb = static_cast<real *>(sb->s.x.rvec_array()[0]); + for (i = 0; i < n; i++) + { + xb[i] = lastx[i] + b*s[i]; + } + + neval++; + // Calculate energy for the trial step in point B + energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE); + fnorm = sb->fnorm; + + // Calculate gradient in point B + real *fb = static_cast<real *>(sb->f.rvec_array()[0]); + for (gpb = 0, i = 0; i < n; i++) + { + gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */ + + } + /* Sum the gradient along the line across CPUs */ + if (PAR(cr)) + { + gmx_sumd(1, &gpb, cr); + } + + // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative + // at the new point B, and rename the endpoints of this new interval A and C. + if (gpb > 0) + { + /* Replace c endpoint with b */ + c = b; + /* swap states b and c */ + swap_em_state(&sb, &sc); + } + else + { + /* Replace a endpoint with b */ + a = b; + /* swap states a and b */ + swap_em_state(&sa, &sb); + } + + /* + * Stop search as soon as we find a value smaller than the endpoints, + * or if the tolerance is below machine precision. + * Never run more than 20 steps, no matter what. + */ + nminstep++; + } + while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20)); + + if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20) + { + /* OK. We couldn't find a significantly lower energy. + * If ncorr==0 this was steepest descent, and then we give up. + * If not, reset memory to restart as steepest descent before quitting. + */ + if (ncorr == 0) + { + /* Converged */ + converged = TRUE; + break; + } + else + { + /* Reset memory */ + ncorr = 0; + /* Search in gradient direction */ + for (i = 0; i < n; i++) + { + dx[point][i] = ff[i]; + } + /* Reset stepsize */ + stepsize = 1.0/fnorm; + continue; + } + } + + /* Select min energy state of A & C, put the best in xx/ff/Epot + */ + if (sc->epot < sa->epot) + { + /* Use state C */ + ems = *sc; + step_taken = c; + } + else + { + /* Use state A */ + ems = *sa; + step_taken = a; + } + + } + else + { + /* found lower */ + /* Use state C */ + ems = *sc; + step_taken = c; + } + + /* Update the memory information, and calculate a new + * approximation of the inverse hessian + */ + + /* Have new data in Epot, xx, ff */ + if (ncorr < nmaxcorr) + { + ncorr++; + } + + for (i = 0; i < n; i++) + { + dg[point][i] = lastf[i]-ff[i]; + dx[point][i] *= step_taken; + } + + dgdg = 0; + dgdx = 0; + for (i = 0; i < n; i++) + { + dgdg += dg[point][i]*dg[point][i]; + dgdx += dg[point][i]*dx[point][i]; + } + + diag = dgdx/dgdg; + + rho[point] = 1.0/dgdx; + point++; + + if (point >= nmaxcorr) + { + point = 0; + } + + /* Update */ + for (i = 0; i < n; i++) + { + p[i] = ff[i]; + } + + cp = point; + + /* Recursive update. First go back over the memory points */ + for (k = 0; k < ncorr; k++) + { + cp--; + if (cp < 0) + { + cp = ncorr-1; + } + + sq = 0; + for (i = 0; i < n; i++) + { + sq += dx[cp][i]*p[i]; + } + + alpha[cp] = rho[cp]*sq; + + for (i = 0; i < n; i++) + { + p[i] -= alpha[cp]*dg[cp][i]; + } + } + + for (i = 0; i < n; i++) + { + p[i] *= diag; + } + + /* And then go forward again */ + for (k = 0; k < ncorr; k++) + { + yr = 0; + for (i = 0; i < n; i++) + { + yr += p[i]*dg[cp][i]; + } + + beta = rho[cp]*yr; + beta = alpha[cp]-beta; + + for (i = 0; i < n; i++) + { + p[i] += beta*dx[cp][i]; + } + + cp++; + if (cp >= ncorr) + { + cp = 0; + } + } + + for (i = 0; i < n; i++) + { + if (!frozen[i]) + { + dx[point][i] = p[i]; + } + else + { + dx[point][i] = 0; + } + } + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (mdrunOptions.verbose) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", + step, ems.epot, ems.fnorm/sqrtNumAtoms, ems.fmax, ems.a_fmax + 1); + fflush(stderr); + } + /* Store the new (lower) energies */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(step), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, nullBox, + nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + do_log = do_per_step(step, inputrec->nstlog); + do_ene = do_per_step(step, inputrec->nstenergy); + if (do_log) + { + print_ebin_header(fplog, step, step); + } + print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE, + do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Send x and E to IMD client, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x.rvec_array(), inputrec, 0, wcycle) && MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + // Reset stepsize in we are doing more iterations + stepsize = 1.0/ems.fnorm; + + /* Stop when the maximum force lies below tolerance. + * If we have reached machine precision, converged is already set to true. + */ + converged = converged || (ems.fmax < inputrec->em_tol); + + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + if (converged) + { + step--; /* we never took that last step in this case */ + + } + if (ems.fmax > inputrec->em_tol) + { + if (MASTER(cr)) + { + warn_step(fplog, inputrec->em_tol, ems.fmax, + step-1 == number_steps, FALSE); + } + converged = FALSE; + } + + /* If we printed energy and/or logfile last step (which was the last step) + * we don't have to do it again, but otherwise print the final values. + */ + if (!do_log) /* Write final value to log since we didn't do anythin last step */ + { + print_ebin_header(fplog, step, step); + } + if (!do_ene || !do_log) /* Write final energy file entries */ + { + print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE, + !do_log ? fplog : nullptr, step, step, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + } + + /* Print some stuff... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + + /* IMPORTANT! + * For accurate normal mode calculation it is imperative that we + * store the last conformation into the full precision binary trajectory. + * + * However, we should only do it if we did NOT already write this step + * above (which we did if do_x or do_f was true). + */ + do_x = !do_per_step(step, inputrec->nstxout); + do_f = !do_per_step(step, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, step, + &ems, state_global, observablesHistory); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, + number_steps, &ems, sqrtNumAtoms); + print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, + number_steps, &ems, sqrtNumAtoms); + + fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + walltime_accounting_set_nsteps_done(walltime_accounting, step); +} + +void +Integrator::do_steep() +{ + const char *SD = "Steepest Descents"; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + real stepsize; + real ustep; + gmx_mdoutf_t outf; + t_mdebin *mdebin; + gmx_bool bDone, bAbort, do_x, do_f; + tensor vir, pres; + rvec mu_tot; + int nsteps; + int count = 0; + int steps_accepted = 0; + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating steepest-descent energy minimization via the " + "integrator .mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx minimize and an .mdp option."); + + /* Create 2 states on the stack and extract pointers that we will swap */ + em_state_t s0 {}, s1 {}; + em_state_t *s_min = &s0; + em_state_t *s_try = &s1; + + /* Init em and store the local state in s_try */ + init_em(fplog, mdlog, SD, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, s_try, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, nullptr, + nfile, fnm, &outf, &mdebin, wcycle); + + /* Print to log file */ + print_em_start(fplog, cr, walltime_accounting, wcycle, SD); + + /* Set variables for stepsize (in nm). This is the largest + * step that we are going to make in any direction. + */ + ustep = inputrec->em_stepsize; + stepsize = 0; + + /* Max number of steps */ + nsteps = inputrec->nsteps; + + if (MASTER(cr)) + { + /* Print to the screen */ + sp_header(stderr, SD, inputrec->em_tol, nsteps); + } + if (fplog) + { + sp_header(fplog, SD, inputrec->em_tol, nsteps); + } + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + + /**** HERE STARTS THE LOOP **** + * count is the counter for the number of steps + * bDone will be TRUE when the minimization has converged + * bAbort will be TRUE when nsteps steps have been performed or when + * the stepsize becomes smaller than is reasonable for machine precision + */ + count = 0; + bDone = FALSE; + bAbort = FALSE; + while (!bDone && !bAbort) + { + bAbort = (nsteps >= 0) && (count == nsteps); + + /* set new coordinates, except for first step */ + bool validStep = true; + if (count > 0) + { + validStep = + do_em_step(cr, inputrec, mdatoms, + s_min, stepsize, &s_min->f, s_try, + constr, count); + } + + if (validStep) + { + energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0); + } + else + { + // Signal constraint error during stepping with energy=inf + s_try->epot = std::numeric_limits<real>::infinity(); + } + + if (MASTER(cr)) + { + print_ebin_header(fplog, count, count); + } + + if (count == 0) + { + s_min->epot = s_try->epot; + } + + /* Print it if necessary */ + if (MASTER(cr)) + { + if (mdrunOptions.verbose) + { + fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c", + count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1, + ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r'); + fflush(stderr); + } + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + /* Store the new (lower) energies */ + matrix nullBox = {}; + upd_mdebin(mdebin, FALSE, FALSE, static_cast<double>(count), + mdatoms->tmass, enerd, nullptr, nullptr, nullptr, + nullBox, nullptr, nullptr, vir, pres, nullptr, mu_tot, constr); + + /* Prepare IMD energy record, if bIMD is TRUE. */ + IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE); + + print_ebin(mdoutf_get_fp_ene(outf), TRUE, + do_per_step(steps_accepted, inputrec->nstdisreout), + do_per_step(steps_accepted, inputrec->nstorireout), + fplog, count, count, eprNORMAL, + mdebin, fcd, &(top_global->groups), &(inputrec->opts), nullptr); + fflush(fplog); + } + } + + /* Now if the new energy is smaller than the previous... + * or if this is the first step! + * or if we did random steps! + */ + + if ( (count == 0) || (s_try->epot < s_min->epot) ) + { + steps_accepted++; + + /* Test whether the convergence criterion is met... */ + bDone = (s_try->fmax < inputrec->em_tol); + + /* Copy the arrays for force, positions and energy */ + /* The 'Min' array always holds the coords and forces of the minimal + sampled energy */ + swap_em_state(&s_min, &s_try); + if (count > 0) + { + ustep *= 1.2; + } + + /* Write to trn, if necessary */ + do_x = do_per_step(steps_accepted, inputrec->nstxout); + do_f = do_per_step(steps_accepted, inputrec->nstfout); + write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, + top_global, inputrec, count, + s_min, state_global, observablesHistory); + } + else + { + /* If energy is not smaller make the step smaller... */ + ustep *= 0.5; + + if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) + { + /* Reload the old state */ + em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, + s_min, top, mdAtoms, fr, vsite, constr, + nrnb, wcycle); + } + } + + /* Determine new step */ + stepsize = ustep/s_min->fmax; + + /* Check if stepsize is too small, with 1 nm as a characteristic length */ +#if GMX_DOUBLE + if (count == nsteps || ustep < 1e-12) +#else + if (count == nsteps || ustep < 1e-6) +#endif + { + if (MASTER(cr)) + { + warn_step(fplog, inputrec->em_tol, s_min->fmax, + count == nsteps, constr != nullptr); + } + bAbort = TRUE; + } + + /* Send IMD energies and positions, if bIMD is TRUE. */ + if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, + MASTER(cr) ? state_global->x.rvec_array() : nullptr, + inputrec, 0, wcycle) && + MASTER(cr)) + { + IMD_send_positions(inputrec->imd); + } + + count++; + } /* End of the loop */ + + /* IMD cleanup, if bIMD is TRUE. */ + IMD_finalize(inputrec->bIMD, inputrec->imd); + + /* Print some data... */ + if (MASTER(cr)) + { + fprintf(stderr, "\nwriting lowest energy coordinates.\n"); + } + write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm), + top_global, inputrec, count, + s_min, state_global, observablesHistory); + + if (MASTER(cr)) + { + double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms)); + + print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, + s_min, sqrtNumAtoms); + print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, + s_min, sqrtNumAtoms); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + /* To print the actual number of steps we needed somewhere */ + inputrec->nsteps = count; + + walltime_accounting_set_nsteps_done(walltime_accounting, count); +} + +void +Integrator::do_nm() +{ + const char *NM = "Normal Mode Analysis"; + gmx_mdoutf_t outf; + int nnodes, node; + gmx_localtop_t *top; + gmx_enerdata_t *enerd; + gmx_global_stat_t gstat; + t_graph *graph; + tensor vir, pres; + rvec mu_tot; + rvec *dfdx; + gmx_bool bSparse; /* use sparse matrix storage format */ + size_t sz; + gmx_sparsematrix_t * sparse_matrix = nullptr; + real * full_matrix = nullptr; + + /* added with respect to mdrun */ + int row, col; + real der_range = 10.0*std::sqrt(GMX_REAL_EPS); + real x_min; + bool bIsMaster = MASTER(cr); + auto mdatoms = mdAtoms->mdatoms(); + + GMX_LOG(mdlog.info).asParagraph(). + appendText("Note that activating normal-mode analysis via the integrator " + ".mdp option and the command gmx mdrun may " + "be available in a different form in a future version of GROMACS, " + "e.g. gmx normal-modes."); + + if (constr != nullptr) + { + gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported"); + } + + gmx_shellfc_t *shellfc; + + em_state_t state_work {}; + + /* Init em and store the local state in state_minimum */ + init_em(fplog, mdlog, NM, cr, ms, outputProvider, inputrec, mdrunOptions, + state_global, top_global, &state_work, &top, + nrnb, mu_tot, fr, &enerd, &graph, mdAtoms, &gstat, + vsite, constr, &shellfc, + nfile, fnm, &outf, nullptr, wcycle); + + std::vector<int> atom_index = get_atom_index(top_global); + std::vector<gmx::RVec> fneg(atom_index.size(), {0, 0, 0}); + snew(dfdx, atom_index.size()); + +#if !GMX_DOUBLE + if (bIsMaster) + { + fprintf(stderr, + "NOTE: This version of GROMACS has been compiled in single precision,\n" + " which MIGHT not be accurate enough for normal mode analysis.\n" + " GROMACS now uses sparse matrix storage, so the memory requirements\n" + " are fairly modest even if you recompile in double precision.\n\n"); + } +#endif + + /* Check if we can/should use sparse storage format. + * + * Sparse format is only useful when the Hessian itself is sparse, which it + * will be when we use a cutoff. + * For small systems (n<1000) it is easier to always use full matrix format, though. + */ + if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0) + { + GMX_LOG(mdlog.warning).appendText("Non-cutoff electrostatics used, forcing full Hessian format."); + bSparse = FALSE; + } + else if (atom_index.size() < 1000) + { + GMX_LOG(mdlog.warning).appendTextFormatted("Small system size (N=%zu), using full Hessian format.", + atom_index.size()); + bSparse = FALSE; + } + else + { + GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format."); + bSparse = TRUE; + } + + /* Number of dimensions, based on real atoms, that is not vsites or shell */ + sz = DIM*atom_index.size(); + + fprintf(stderr, "Allocating Hessian memory...\n\n"); + + if (bSparse) + { + sparse_matrix = gmx_sparsematrix_init(sz); + sparse_matrix->compressed_symmetric = TRUE; + } + else + { + snew(full_matrix, sz*sz); + } + + init_nrnb(nrnb); + + + /* Write start time and temperature */ + print_em_start(fplog, cr, walltime_accounting, wcycle, NM); + + /* fudge nr of steps to nr of atoms */ + inputrec->nsteps = atom_index.size()*2; + + if (bIsMaster) + { + fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n", + *(top_global->name), inputrec->nsteps); + } + + nnodes = cr->nnodes; + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + EnergyEvaluator energyEvaluator { + fplog, mdlog, cr, ms, + top_global, top, + inputrec, nrnb, wcycle, gstat, + vsite, constr, fcd, graph, + mdAtoms, fr, ppForceWorkload, enerd + }; + energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE); + cr->nnodes = nnodes; + + /* if forces are not small, warn user */ + get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work); + + GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax); + if (state_work.fmax > 1.0e-3) + { + GMX_LOG(mdlog.warning).appendText( + "The force is probably not small enough to " + "ensure that you are at a minimum.\n" + "Be aware that negative eigenvalues may occur\n" + "when the resulting matrix is diagonalized."); + } + + /*********************************************************** + * + * Loop over all pairs in matrix + * + * do_force called twice. Once with positive and + * once with negative displacement + * + ************************************************************/ + + /* Steps are divided one by one over the nodes */ + bool bNS = true; + auto state_work_x = makeArrayRef(state_work.s.x); + auto state_work_f = makeArrayRef(state_work.f); + for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes) + { + size_t atom = atom_index[aid]; + for (size_t d = 0; d < DIM; d++) + { + int64_t step = 0; + int force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES; + double t = 0; + + x_min = state_work_x[atom][d]; + + for (unsigned int dx = 0; (dx < 2); dx++) + { + if (dx == 0) + { + state_work_x[atom][d] = x_min - der_range; + } + else + { + state_work_x[atom][d] = x_min + der_range; + } + + /* Make evaluate_energy do a single node force calculation */ + cr->nnodes = 1; + if (shellfc) + { + /* Now is the time to relax the shells */ + relax_shell_flexcon(fplog, + cr, + ms, + mdrunOptions.verbose, + nullptr, + step, + inputrec, + bNS, + force_flags, + top, + constr, + enerd, + fcd, + &state_work.s, + state_work.f.arrayRefWithPadding(), + vir, + mdatoms, + nrnb, + wcycle, + graph, + &top_global->groups, + shellfc, + fr, + ppForceWorkload, + t, + mu_tot, + vsite, + DdOpenBalanceRegionBeforeForceComputation::no, + DdCloseBalanceRegionAfterForceComputation::no); + bNS = false; + step++; + } + else + { + energyEvaluator.run(&state_work, mu_tot, vir, pres, aid*2+dx, FALSE); + } + + cr->nnodes = nnodes; + + if (dx == 0) + { + std::copy(state_work_f.begin(), state_work_f.begin()+atom_index.size(), fneg.begin()); + } + } + + /* x is restored to original */ + state_work_x[atom][d] = x_min; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; (k < DIM); k++) + { + dfdx[j][k] = + -(state_work_f[atom_index[j]][k] - fneg[j][k])/(2*der_range); + } + } + + if (!bIsMaster) + { +#if GMX_MPI +#define mpi_type GMX_MPI_REAL + MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr), + cr->nodeid, cr->mpi_comm_mygroup); +#endif + } + else + { + for (node = 0; (node < nnodes && aid+node < atom_index.size()); node++) + { + if (node > 0) + { +#if GMX_MPI + MPI_Status stat; + MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node, + cr->mpi_comm_mygroup, &stat); +#undef mpi_type +#endif + } + + row = (aid + node)*DIM + d; + + for (size_t j = 0; j < atom_index.size(); j++) + { + for (size_t k = 0; k < DIM; k++) + { + col = j*DIM + k; + + if (bSparse) + { + if (col >= row && dfdx[j][k] != 0.0) + { + gmx_sparsematrix_increment_value(sparse_matrix, + row, col, dfdx[j][k]); + } + } + else + { + full_matrix[row*sz+col] = dfdx[j][k]; + } + } + } + } + } + + if (mdrunOptions.verbose && fplog) + { + fflush(fplog); + } + } + /* write progress */ + if (bIsMaster && mdrunOptions.verbose) + { + fprintf(stderr, "\rFinished step %d out of %d", + static_cast<int>(std::min(atom+nnodes, atom_index.size())), + static_cast<int>(atom_index.size())); + fflush(stderr); + } + } + + if (bIsMaster) + { + fprintf(stderr, "\n\nWriting Hessian...\n"); + gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix); + } + + finish_em(cr, outf, walltime_accounting, wcycle); + + walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2); +} + +} // namespace gmx diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.cpp b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f0306c64f05d0c66387e05b8fe68c4c96fef437c --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.cpp @@ -0,0 +1,1487 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \internal \file + * + * \brief Implements the replica exchange routines. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "replicaexchange.h" + +#include "config.h" + +#include <cmath> + +#include <random> + +#include "gromacs/domdec/collect.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/math/units.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdrun/multisim.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/enerdata.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/random/threefry.h" +#include "gromacs/random/uniformintdistribution.h" +#include "gromacs/random/uniformrealdistribution.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/smalloc.h" + + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +/* END PLUMED */ + +/* PLUMED HREX */ +extern int plumed_hrex; +/* END PLUMED HREX */ + +//! Helps cut off probability values. +constexpr int c_probabilityCutoff = 100; + +/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ + +//! Rank in the multisimulation +#define MSRANK(ms, nodeid) (nodeid) + +//! Enum for replica exchange flavours +enum { + ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR +}; +/*! \brief Strings describing replica exchange flavours. + * + * end_single_marker merely notes the end of single variable replica + * exchange. All types higher than it are multiple replica exchange + * methods. + * + * Eventually, should add 'pressure', 'temperature and pressure', + * 'lambda_and_pressure', 'temperature_lambda_pressure'?; Let's wait + * until we feel better about the pressure control methods giving + * exact ensembles. Right now, we assume constant pressure */ +static const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; + +//! Working data for replica exchange. +struct gmx_repl_ex +{ + //! Replica ID + int repl; + //! Total number of replica + int nrepl; + //! Temperature + real temp; + //! Replica exchange type from ere enum + int type; + //! Quantity, e.g. temperature or lambda; first index is ere, second index is replica ID + real **q; + //! Use constant pressure and temperature + gmx_bool bNPT; + //! Replica pressures + real *pres; + //! Replica indices + int *ind; + //! Used for keeping track of all the replica swaps + int *allswaps; + //! Replica exchange interval (number of steps) + int nst; + //! Number of exchanges per interval + int nex; + //! Random seed + int seed; + //! Number of even and odd replica change attempts + int nattempt[2]; + //! Sum of probabilities + real *prob_sum; + //! Number of moves between replicas i and j + int **nmoves; + //! i-th element of the array is the number of exchanges between replica i-1 and i + int *nexchange; + + /*! \brief Helper arrays for replica exchange; allocated here + * so they don't have to be allocated each time */ + //! \{ + int *destinations; + int **cyclic; + int **order; + int *tmpswap; + gmx_bool *incycle; + gmx_bool *bEx; + //! \} + + //! Helper arrays to hold the quantities that are exchanged. + //! \{ + real *prob; + real *Epot; + real *beta; + real *Vol; + real **de; + //! \} +}; + +// TODO We should add Doxygen here some time. +//! \cond + +static gmx_bool repl_quantity(const gmx_multisim_t *ms, + struct gmx_repl_ex *re, int ere, real q) +{ + real *qall; + gmx_bool bDiff; + int s; + + snew(qall, ms->nsim); + qall[re->repl] = q; + gmx_sum_sim(ms->nsim, qall, ms); + + /* PLUMED */ + //bDiff = FALSE; + //for (s = 1; s < ms->nsim; s++) + //{ + // if (qall[s] != qall[0]) + // { + bDiff = TRUE; + // } + //} + /* END PLUMED */ + + if (bDiff) + { + /* Set the replica exchange type and quantities */ + re->type = ere; + + snew(re->q[ere], re->nrepl); + for (s = 0; s < ms->nsim; s++) + { + re->q[ere][s] = qall[s]; + } + } + sfree(qall); + return bDiff; +} + +gmx_repl_ex_t +init_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + int numAtomsInSystem, + const t_inputrec *ir, + const ReplicaExchangeParameters &replExParams) +{ + real pres; + int i, j; + struct gmx_repl_ex *re; + gmx_bool bTemp; + gmx_bool bLambda = FALSE; + + fprintf(fplog, "\nInitializing Replica Exchange\n"); + + if (!isMultiSim(ms) || ms->nsim == 1) + { + gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multidir option of mdrun?"); + } + if (!EI_DYNAMICS(ir->eI)) + { + gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); + /* Note that PAR(cr) is defined by cr->nnodes > 1, which is + * distinct from isMultiSim(ms). A multi-simulation only runs + * with real MPI parallelism, but this does not imply PAR(cr) + * is true! + * + * Since we are using a dynamical integrator, the only + * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are + * synonymous. The only way for cr->nnodes > 1 to be true is + * if we are using DD. */ + } + + snew(re, 1); + + re->repl = ms->sim; + re->nrepl = ms->nsim; + snew(re->q, ereENDSINGLE); + + fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); + + /* We only check that the number of atoms in the systms match. + * This, of course, do not guarantee that the systems are the same, + * but it does guarantee that we can perform replica exchange. + */ + check_multi_int(fplog, ms, numAtomsInSystem, "the number of atoms", FALSE); + check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); + check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); + const int nst = replExParams.exchangeInterval; + check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst, + "first exchange step: init_step/-replex", FALSE); + check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); + check_multi_int(fplog, ms, ir->opts.ngtc, + "the number of temperature coupling groups", FALSE); + check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); + check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); + check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); + + re->temp = ir->opts.ref_t[0]; + for (i = 1; (i < ir->opts.ngtc); i++) + { + if (ir->opts.ref_t[i] != re->temp) + { + fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + } + } + + re->type = -1; + bTemp = repl_quantity(ms, re, ereTEMP, re->temp); + if (ir->efep != efepNO) + { + bLambda = repl_quantity(ms, re, ereLAMBDA, static_cast<real>(ir->fepvals->init_fep_state)); + } + if (re->type == -1) /* nothing was assigned */ + { + gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); + } + if (bLambda && bTemp) + { + re->type = ereTL; + } + + if (bTemp) + { + please_cite(fplog, "Sugita1999a"); + if (ir->epc != epcNO) + { + re->bNPT = TRUE; + fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); + please_cite(fplog, "Okabe2001a"); + } + if (ir->etc == etcBERENDSEN) + { + gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", + ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); + } + } + if (bLambda) + { + if (ir->fepvals->delta_lambda != 0) /* check this? */ + { + gmx_fatal(FARGS, "delta_lambda is not zero"); + } + } + if (re->bNPT) + { + snew(re->pres, re->nrepl); + if (ir->epct == epctSURFACETENSION) + { + pres = ir->ref_p[ZZ][ZZ]; + } + else + { + pres = 0; + j = 0; + for (i = 0; i < DIM; i++) + { + if (ir->compress[i][i] != 0) + { + pres += ir->ref_p[i][i]; + j++; + } + } + pres /= j; + } + re->pres[re->repl] = pres; + gmx_sum_sim(re->nrepl, re->pres, ms); + } + + /* Make an index for increasing replica order */ + /* only makes sense if one or the other is varying, not both! + if both are varying, we trust the order the person gave. */ + snew(re->ind, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->ind[i] = i; + } + + /* PLUMED */ + // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD) + // in those cases replicas can share the same temperature. + /* + if (re->type < ereENDSINGLE) + { + + for (i = 0; i < re->nrepl; i++) + { + for (j = i+1; j < re->nrepl; j++) + { + if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) + {*/ + /* Unordered replicas are supposed to work, but there + * is still an issues somewhere. + * Note that at this point still re->ind[i]=i. + */ + /* + gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", + i, j, + erename[re->type], + re->q[re->type][i], re->q[re->type][j], + erename[re->type]); + } + else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) + { + gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); + } + } + } + } + */ + /* END PLUMED */ + + /* keep track of all the swaps, starting with the initial placement. */ + snew(re->allswaps, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->allswaps[i] = re->ind[i]; + } + + switch (re->type) + { + case ereTEMP: + fprintf(fplog, "\nReplica exchange in temperature\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + case ereLAMBDA: + fprintf(fplog, "\nReplica exchange in lambda\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %3d", static_cast<int>(re->q[re->type][re->ind[i]])); + } + fprintf(fplog, "\n"); + break; + case ereTL: + fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); + } + fprintf(fplog, "\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5d", static_cast<int>(re->q[ereLAMBDA][re->ind[i]])); + } + fprintf(fplog, "\n"); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (re->bNPT) + { + fprintf(fplog, "\nRepl p"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); + } + + for (i = 0; i < re->nrepl; i++) + { + if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) + { + fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + } + } + } + re->nst = nst; + if (replExParams.randomSeed == -1) + { + if (isMasterSim(ms)) + { + re->seed = static_cast<int>(gmx::makeRandomSeed()); + } + else + { + re->seed = 0; + } + gmx_sumi_sim(1, &(re->seed), ms); + } + else + { + re->seed = replExParams.randomSeed; + } + fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); + fprintf(fplog, "\nReplica random seed: %d\n", re->seed); + + re->nattempt[0] = 0; + re->nattempt[1] = 0; + + snew(re->prob_sum, re->nrepl); + snew(re->nexchange, re->nrepl); + snew(re->nmoves, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->nmoves[i], re->nrepl); + } + fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n"); + + /* generate space for the helper functions so we don't have to snew each time */ + + snew(re->destinations, re->nrepl); + snew(re->incycle, re->nrepl); + snew(re->tmpswap, re->nrepl); + snew(re->cyclic, re->nrepl); + snew(re->order, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->cyclic[i], re->nrepl+1); + snew(re->order[i], re->nrepl); + } + /* allocate space for the functions storing the data for the replicas */ + /* not all of these arrays needed in all cases, but they don't take + up much space, since the max size is nrepl**2 */ + snew(re->prob, re->nrepl); + snew(re->bEx, re->nrepl); + snew(re->beta, re->nrepl); + snew(re->Vol, re->nrepl); + snew(re->Epot, re->nrepl); + snew(re->de, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->de[i], re->nrepl); + } + re->nex = replExParams.numExchanges; + return re; +} + +static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n) +{ + real *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + + +static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n) +{ + double *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + +static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n) +{ + rvec *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + copy_rvec(buf[i], v[i]); + } + sfree(buf); + } +} + +/* PLUMED HREX */ +void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) +/* END PLUMED HREX */ +{ + /* When t_state changes, this code should be updated. */ + int ngtc, nnhpres; + ngtc = state->ngtc * state->nhchainlength; + nnhpres = state->nnhpres* state->nhchainlength; + exchange_rvecs(ms, b, state->box, DIM); + exchange_rvecs(ms, b, state->box_rel, DIM); + exchange_rvecs(ms, b, state->boxv, DIM); + exchange_reals(ms, b, &(state->veta), 1); + exchange_reals(ms, b, &(state->vol0), 1); + exchange_rvecs(ms, b, state->svir_prev, DIM); + exchange_rvecs(ms, b, state->fvir_prev, DIM); + exchange_rvecs(ms, b, state->pres_prev, DIM); + exchange_doubles(ms, b, state->nosehoover_xi.data(), ngtc); + exchange_doubles(ms, b, state->nosehoover_vxi.data(), ngtc); + exchange_doubles(ms, b, state->nhpres_xi.data(), nnhpres); + exchange_doubles(ms, b, state->nhpres_vxi.data(), nnhpres); + exchange_doubles(ms, b, state->therm_integral.data(), state->ngtc); + exchange_doubles(ms, b, &state->baros_integral, 1); + exchange_rvecs(ms, b, state->x.rvec_array(), state->natoms); + exchange_rvecs(ms, b, state->v.rvec_array(), state->natoms); +} + +/* PLUMED HREX */ +void copy_state_serial(const t_state *src, t_state *dest) +/* END PLUMED HREX */ +{ + if (dest != src) + { + /* Currently the local state is always a pointer to the global + * in serial, so we should never end up here. + * TODO: Implement a (trivial) t_state copy once converted to C++. + */ + GMX_RELEASE_ASSERT(false, "State copying is currently not implemented in replica exchange"); + } +} + +static void scale_velocities(gmx::ArrayRef<gmx::RVec> velocities, real fac) +{ + for (auto &v : velocities) + { + v *= fac; + } +} + +static void print_transition_matrix(FILE *fplog, int n, int **nmoves, const int *nattempt) +{ + int i, j, ntot; + float Tprint; + + ntot = nattempt[0] + nattempt[1]; + fprintf(fplog, "\n"); + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, " "); /* put the title closer to the center */ + } + fprintf(fplog, "Empirical Transition Matrix\n"); + + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%8d", (i+1)); + } + fprintf(fplog, "\n"); + + for (i = 0; i < n; i++) + { + fprintf(fplog, "Repl"); + for (j = 0; j < n; j++) + { + Tprint = 0.0; + if (nmoves[i][j] > 0) + { + Tprint = nmoves[i][j]/(2.0*ntot); + } + fprintf(fplog, "%8.4f", Tprint); + } + fprintf(fplog, "%3d\n", i); + } +} + +static void print_ind(FILE *fplog, const char *leg, int n, int *ind, const gmx_bool *bEx) +{ + int i; + + fprintf(fplog, "Repl %2s %2d", leg, ind[0]); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %c %2d", (bEx != nullptr && bEx[i]) ? 'x' : ' ', ind[i]); + } + fprintf(fplog, "\n"); +} + +static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap) +{ + int i; + + for (i = 0; i < n; i++) + { + tmpswap[i] = allswaps[i]; + } + for (i = 0; i < n; i++) + { + allswaps[i] = tmpswap[pind[i]]; + } + + fprintf(fplog, "\nAccepted Exchanges: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", pind[i]); + } + fprintf(fplog, "\n"); + + /* the "Order After Exchange" is the state label corresponding to the configuration that + started in state listed in order, i.e. + + 3 0 1 2 + + means that the: + configuration starting in simulation 3 is now in simulation 0, + configuration starting in simulation 0 is now in simulation 1, + configuration starting in simulation 1 is now in simulation 2, + configuration starting in simulation 2 is now in simulation 3 + */ + fprintf(fplog, "Order After Exchange: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", allswaps[i]); + } + fprintf(fplog, "\n\n"); +} + +static void print_prob(FILE *fplog, const char *leg, int n, real *prob) +{ + int i; + char buf[8]; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + if (prob[i] >= 0) + { + sprintf(buf, "%4.2f", prob[i]); + fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); + } + else + { + fprintf(fplog, " "); + } + } + fprintf(fplog, "\n"); +} + +static void print_count(FILE *fplog, const char *leg, int n, int *count) +{ + int i; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %4d", count[i]); + } + fprintf(fplog, "\n"); +} + +static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) +{ + + real ediff, dpV, delta = 0; + real *Epot = re->Epot; + real *Vol = re->Vol; + real **de = re->de; + real *beta = re->beta; + + /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce + to the non permuted case */ + + switch (re->type) + { + case ereTEMP: + /* + * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 + */ + ediff = Epot[b] - Epot[a]; + delta = -(beta[bp] - beta[ap])*ediff; + break; + case ereLAMBDA: + /* two cases: when we are permuted, and not. */ + /* non-permuted: + ediff = E_new - E_old + = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] + = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] + = de[b][a] + de[a][b] */ + + /* permuted: + ediff = E_new - E_old + = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] + = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] + = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ + /* but, in the current code implementation, we flip configurations, not indices . . . + So let's examine that. + = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] + = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] + = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] + So, if we exchange b<=> bp and a<=> ap, we return to the same result. + So the simple solution is to flip the + position of perturbed and original indices in the tests. + */ + + ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); + delta = ediff*beta[a]; /* assume all same temperature in this case */ + break; + case ereTL: + /* not permuted: */ + /* delta = reduced E_new - reduced E_old + = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] + = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + + [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + + beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) + = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ + /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ + /* permuted (big breath!) */ + /* delta = reduced E_new - reduced E_old + = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) + - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + + [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] + + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + + [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] + + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) + = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) + + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ + delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (bPrint) + { + fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); + } +/* PLUMED HREX */ +/* this is necessary because with plumed HREX the energy contribution is + already taken into account */ + if(plumed_hrex) delta=0.0; +/* END PLUMED HREX */ + if (re->bNPT) + { + /* revist the calculation for 5.0. Might be some improvements. */ + dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; + if (bPrint) + { + fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); + } + delta += dpV; + } + return delta; +} + +static void +test_for_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + struct gmx_repl_ex *re, + const gmx_enerdata_t *enerd, + real vol, + int64_t step, + real time) +{ + int m, i, j, a, b, ap, bp, i0, i1, tmp; + real delta = 0; + gmx_bool bPrint, bMultiEx; + gmx_bool *bEx = re->bEx; + real *prob = re->prob; + int *pind = re->destinations; /* permuted index */ + gmx_bool bEpot = FALSE; + gmx_bool bDLambda = FALSE; + gmx_bool bVol = FALSE; + gmx::ThreeFry2x64<64> rng(re->seed, gmx::RandomDomain::ReplicaExchange); + gmx::UniformRealDistribution<real> uniformRealDist; + gmx::UniformIntDistribution<int> uniformNreplDist(0, re->nrepl-1); + + bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ + fprintf(fplog, "Replica exchange at step %" PRId64 " time %.5f\n", step, time); + + if (re->bNPT) + { + for (i = 0; i < re->nrepl; i++) + { + re->Vol[i] = 0; + } + bVol = TRUE; + re->Vol[re->repl] = vol; + } + if ((re->type == ereTEMP || re->type == ereTL)) + { + for (i = 0; i < re->nrepl; i++) + { + re->Epot[i] = 0; + } + bEpot = TRUE; + re->Epot[re->repl] = enerd->term[F_EPOT]; + /* temperatures of different states*/ + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); + } + } + else + { + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ + } + } + if (re->type == ereLAMBDA || re->type == ereTL) + { + bDLambda = TRUE; + /* lambda differences. */ + /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian + minus the energy of the jth simulation in the jth Hamiltonian */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->de[i][j] = 0; + } + } + for (i = 0; i < re->nrepl; i++) + { + re->de[i][re->repl] = (enerd->enerpart_lambda[static_cast<int>(re->q[ereLAMBDA][i])+1]-enerd->enerpart_lambda[0]); + } + } + + /* now actually do the communication */ + if (bVol) + { + gmx_sum_sim(re->nrepl, re->Vol, ms); + } + if (bEpot) + { + gmx_sum_sim(re->nrepl, re->Epot, ms); + } + if (bDLambda) + { + for (i = 0; i < re->nrepl; i++) + { + gmx_sum_sim(re->nrepl, re->de[i], ms); + } + } + + /* make a duplicate set of indices for shuffling */ + for (i = 0; i < re->nrepl; i++) + { + pind[i] = re->ind[i]; + } + + rng.restart( step, 0 ); + + /* PLUMED */ + int plumed_test_exchange_pattern=0; + if(plumed_test_exchange_pattern && plumed_hrex) gmx_fatal(FARGS,"hrex not compatible with ad hoc exchange patterns"); + /* END PLUMED */ + + if (bMultiEx) + { + /* multiple random switch exchange */ + int nself = 0; + + + for (i = 0; i < re->nex + nself; i++) + { + // For now this is superfluous, but just in case we ever add more + // calls in different branches it is safer to always reset the distribution. + uniformNreplDist.reset(); + + /* randomly select a pair */ + /* in theory, could reduce this by identifying only which switches had a nonneglibible + probability of occurring (log p > -100) and only operate on those switches */ + /* find out which state it is from, and what label that state currently has. Likely + more work that useful. */ + i0 = uniformNreplDist(rng); + i1 = uniformNreplDist(rng); + if (i0 == i1) + { + nself++; + continue; /* self-exchange, back up and do it again */ + } + + a = re->ind[i0]; /* what are the indices of these states? */ + b = re->ind[i1]; + ap = pind[i0]; + bp = pind[i1]; + + bPrint = FALSE; /* too noisy */ + /* calculate the energy difference */ + /* if the code changes to flip the STATES, rather than the configurations, + use the commented version of the code */ + /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ + delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); + + /* we actually only use the first space in the prob and bEx array, + since there are actually many switches between pairs. */ + + if (delta <= 0) + { + /* accepted */ + prob[0] = 1; + bEx[0] = TRUE; + } + else + { + if (delta > c_probabilityCutoff) + { + prob[0] = 0; + } + else + { + prob[0] = exp(-delta); + } + // roll a number to determine if accepted. For now it is superfluous to + // reset, but just in case we ever add more calls in different branches + // it is safer to always reset the distribution. + uniformRealDist.reset(); + bEx[0] = uniformRealDist(rng) < prob[0]; + } + re->prob_sum[0] += prob[0]; + + if (bEx[0]) + { + /* swap the states */ + tmp = pind[i0]; + pind[i0] = pind[i1]; + pind[i1] = tmp; + } + } + re->nattempt[0]++; /* keep track of total permutation trials here */ + print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); + } + else + { + /* standard nearest neighbor replica exchange */ + + m = (step / re->nst) % 2; + /* PLUMED */ + if(plumedswitch){ + int partner=re->repl; + plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern); + if(plumed_test_exchange_pattern>0){ + int *list; + snew(list,re->nrepl); + plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl)); + plumed_cmd(plumedmain,"getExchangesList",list); + for(i=0; i<re->nrepl; i++) re->ind[i]=list[i]; + sfree(list); + } + + for(i=1; i<re->nrepl; i++) { + if (i % 2 != m) continue; + a = re->ind[i-1]; + b = re->ind[i]; + if(re->repl==a) partner=b; + if(re->repl==b) partner=a; + } + plumed_cmd(plumedmain,"GREX setPartner",&partner); + plumed_cmd(plumedmain,"GREX calculate",NULL); + plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL); + } + /* END PLUMED */ + for (i = 1; i < re->nrepl; i++) + { + a = re->ind[i-1]; + b = re->ind[i]; + + bPrint = (re->repl == a || re->repl == b); + if (i % 2 == m) + { + delta = calc_delta(fplog, bPrint, re, a, b, a, b); + /* PLUMED */ + if(plumedswitch){ + real adb,bdb,dplumed; + char buf[300]; + sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb); + sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb); + dplumed=adb*re->beta[a]+bdb*re->beta[b]; + delta+=dplumed; + if (bPrint) + fprintf(fplog,"dplumed = %10.3e dE_Term = %10.3e (kT)\n",dplumed,delta); + } + /* END PLUMED */ + if (delta <= 0) + { + /* accepted */ + prob[i] = 1; + bEx[i] = TRUE; + } + else + { + if (delta > c_probabilityCutoff) + { + prob[i] = 0; + } + else + { + prob[i] = exp(-delta); + } + // roll a number to determine if accepted. For now it is superfluous to + // reset, but just in case we ever add more calls in different branches + // it is safer to always reset the distribution. + uniformRealDist.reset(); + bEx[i] = uniformRealDist(rng) < prob[i]; + } + re->prob_sum[i] += prob[i]; + + if (bEx[i]) + { + /* PLUMED */ + if(!plumed_test_exchange_pattern) { + /* standard neighbour swapping */ + /* swap these two */ + tmp = pind[i-1]; + pind[i-1] = pind[i]; + pind[i] = tmp; + re->nexchange[i]++; /* statistics for back compatibility */ + } else { + /* alternative swapping patterns */ + tmp = pind[a]; + pind[a] = pind[b]; + pind[b] = tmp; + re->nexchange[i]++; /* statistics for back compatibility */ + } + /* END PLUMED */ + } + } + else + { + prob[i] = -1; + bEx[i] = FALSE; + } + } + /* print some statistics */ + print_ind(fplog, "ex", re->nrepl, re->ind, bEx); + print_prob(fplog, "pr", re->nrepl, prob); + fprintf(fplog, "\n"); + re->nattempt[m]++; + } + + /* PLUMED */ + if(plumed_test_exchange_pattern>0) { + for (i = 0; i < re->nrepl; i++) + { + re->ind[i] = i; + } + } + /* END PLUMED */ + + /* record which moves were made and accepted */ + for (i = 0; i < re->nrepl; i++) + { + re->nmoves[re->ind[i]][pind[i]] += 1; + re->nmoves[pind[i]][re->ind[i]] += 1; + } + fflush(fplog); /* make sure we can see what the last exchange was */ +} + +static void +cyclic_decomposition(const int *destinations, + int **cyclic, + gmx_bool *incycle, + const int nrepl, + int *nswap) +{ + + int i, j, c, p; + int maxlen = 1; + for (i = 0; i < nrepl; i++) + { + incycle[i] = FALSE; + } + for (i = 0; i < nrepl; i++) /* one cycle for each replica */ + { + if (incycle[i]) + { + cyclic[i][0] = -1; + continue; + } + cyclic[i][0] = i; + incycle[i] = TRUE; + c = 1; + p = i; + for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ + { + p = destinations[p]; /* start permuting */ + if (p == i) + { + cyclic[i][c] = -1; + if (c > maxlen) + { + maxlen = c; + } + break; /* we've reached the original element, the cycle is complete, and we marked the end. */ + } + else + { + cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ + incycle[p] = TRUE; + c++; + } + } + } + *nswap = maxlen - 1; + + if (debug) + { + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Cycle %d:", i); + for (j = 0; j < nrepl; j++) + { + if (cyclic[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", cyclic[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +compute_exchange_order(int **cyclic, + int **order, + const int nrepl, + const int maxswap) +{ + int i, j; + + for (j = 0; j < maxswap; j++) + { + for (i = 0; i < nrepl; i++) + { + if (cyclic[i][j+1] >= 0) + { + order[cyclic[i][j+1]][j] = cyclic[i][j]; + order[cyclic[i][j]][j] = cyclic[i][j+1]; + } + } + for (i = 0; i < nrepl; i++) + { + if (order[i][j] < 0) + { + order[i][j] = i; /* if it's not exchanging, it should stay this round*/ + } + } + } + + if (debug) + { + fprintf(debug, "Replica Exchange Order\n"); + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Replica %d:", i); + for (j = 0; j < maxswap; j++) + { + if (order[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", order[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +prepare_to_do_exchange(struct gmx_repl_ex *re, + const int replica_id, + int *maxswap, + gmx_bool *bThisReplicaExchanged) +{ + int i, j; + /* Hold the cyclic decomposition of the (multiple) replica + * exchange. */ + gmx_bool bAnyReplicaExchanged = FALSE; + *bThisReplicaExchanged = FALSE; + + for (i = 0; i < re->nrepl; i++) + { + if (re->destinations[i] != re->ind[i]) + { + /* only mark as exchanged if the index has been shuffled */ + bAnyReplicaExchanged = TRUE; + break; + } + } + if (bAnyReplicaExchanged) + { + /* reinitialize the placeholder arrays */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->cyclic[i][j] = -1; + re->order[i][j] = -1; + } + } + + /* Identify the cyclic decomposition of the permutation (very + * fast if neighbor replica exchange). */ + cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); + + /* Now translate the decomposition into a replica exchange + * order at each step. */ + compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap); + + /* Did this replica do any exchange at any point? */ + for (j = 0; j < *maxswap; j++) + { + if (replica_id != re->order[replica_id][j]) + { + *bThisReplicaExchanged = TRUE; + break; + } + } + } +} + +gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, + const gmx_multisim_t *ms, struct gmx_repl_ex *re, + t_state *state, const gmx_enerdata_t *enerd, + t_state *state_local, int64_t step, real time) +{ + int j; + int replica_id = 0; + int exchange_partner; + int maxswap = 0; + /* Number of rounds of exchanges needed to deal with any multiple + * exchanges. */ + /* Where each replica ends up after the exchange attempt(s). */ + /* The order in which multiple exchanges will occur. */ + gmx_bool bThisReplicaExchanged = FALSE; + + /* PLUMED */ + if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL); + /* END PLUMED */ + + if (MASTER(cr)) + { + replica_id = re->repl; + test_for_replica_exchange(fplog, ms, re, enerd, det(state_local->box), step, time); + prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged); + } + /* Do intra-simulation broadcast so all processors belonging to + * each simulation know whether they need to participate in + * collecting the state. Otherwise, they might as well get on with + * the next thing to do. */ + if (DOMAINDECOMP(cr)) + { +#if GMX_MPI + MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), + cr->mpi_comm_mygroup); +#endif + } + + if (bThisReplicaExchanged) + { + /* Exchange the states */ + /* Collect the global state on the master node */ + if (DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state_local, state); + } + else + { + copy_state_serial(state_local, state); + } + + if (MASTER(cr)) + { + /* There will be only one swap cycle with standard replica + * exchange, but there may be multiple swap cycles if we + * allow multiple swaps. */ + + for (j = 0; j < maxswap; j++) + { + exchange_partner = re->order[replica_id][j]; + + if (exchange_partner != replica_id) + { + /* Exchange the global states between the master nodes */ + if (debug) + { + fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); + } + exchange_state(ms, exchange_partner, state); + } + } + /* For temperature-type replica exchange, we need to scale + * the velocities. */ + if (re->type == ereTEMP || re->type == ereTL) + { + scale_velocities(state->v, + std::sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); + } + + } + + /* With domain decomposition the global state is distributed later */ + if (!DOMAINDECOMP(cr)) + { + /* Copy the global state to the local state data structure */ + copy_state_serial(state, state_local); + } + } + + return bThisReplicaExchanged; +} + +void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) +{ + int i; + + fprintf(fplog, "\nReplica exchange statistics\n"); + + if (re->nex == 0) + { + fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", + re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); + + fprintf(fplog, "Repl average probabilities:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, nullptr); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "Repl number of exchanges:\n"); + print_ind(fplog, "", re->nrepl, re->ind, nullptr); + print_count(fplog, "", re->nrepl, re->nexchange); + + fprintf(fplog, "Repl average number of exchanges:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = (static_cast<real>(re->nexchange[i]))/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, nullptr); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "\n"); + } + /* print the transition matrix */ + print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); +} + +/* PLUMED HREX */ +int replica_exchange_get_repl(const gmx_repl_ex_t re){ + return re->repl; +}; + +int replica_exchange_get_nrepl(const gmx_repl_ex_t re){ + return re->nrepl; +}; +/* END PLUMED HREX */ +//! \endcond diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..a633d688c67c1755effb063557635296875bc876 --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.cpp.preplumed @@ -0,0 +1,1383 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +/*! \internal \file + * + * \brief Implements the replica exchange routines. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "replicaexchange.h" + +#include "config.h" + +#include <cmath> + +#include <random> + +#include "gromacs/domdec/collect.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/math/units.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdrun/multisim.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/enerdata.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/random/threefry.h" +#include "gromacs/random/uniformintdistribution.h" +#include "gromacs/random/uniformrealdistribution.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/smalloc.h" + +//! Helps cut off probability values. +constexpr int c_probabilityCutoff = 100; + +/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */ + +//! Rank in the multisimulation +#define MSRANK(ms, nodeid) (nodeid) + +//! Enum for replica exchange flavours +enum { + ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR +}; +/*! \brief Strings describing replica exchange flavours. + * + * end_single_marker merely notes the end of single variable replica + * exchange. All types higher than it are multiple replica exchange + * methods. + * + * Eventually, should add 'pressure', 'temperature and pressure', + * 'lambda_and_pressure', 'temperature_lambda_pressure'?; Let's wait + * until we feel better about the pressure control methods giving + * exact ensembles. Right now, we assume constant pressure */ +static const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"}; + +//! Working data for replica exchange. +struct gmx_repl_ex +{ + //! Replica ID + int repl; + //! Total number of replica + int nrepl; + //! Temperature + real temp; + //! Replica exchange type from ere enum + int type; + //! Quantity, e.g. temperature or lambda; first index is ere, second index is replica ID + real **q; + //! Use constant pressure and temperature + gmx_bool bNPT; + //! Replica pressures + real *pres; + //! Replica indices + int *ind; + //! Used for keeping track of all the replica swaps + int *allswaps; + //! Replica exchange interval (number of steps) + int nst; + //! Number of exchanges per interval + int nex; + //! Random seed + int seed; + //! Number of even and odd replica change attempts + int nattempt[2]; + //! Sum of probabilities + real *prob_sum; + //! Number of moves between replicas i and j + int **nmoves; + //! i-th element of the array is the number of exchanges between replica i-1 and i + int *nexchange; + + /*! \brief Helper arrays for replica exchange; allocated here + * so they don't have to be allocated each time */ + //! \{ + int *destinations; + int **cyclic; + int **order; + int *tmpswap; + gmx_bool *incycle; + gmx_bool *bEx; + //! \} + + //! Helper arrays to hold the quantities that are exchanged. + //! \{ + real *prob; + real *Epot; + real *beta; + real *Vol; + real **de; + //! \} +}; + +// TODO We should add Doxygen here some time. +//! \cond + +static gmx_bool repl_quantity(const gmx_multisim_t *ms, + struct gmx_repl_ex *re, int ere, real q) +{ + real *qall; + gmx_bool bDiff; + int s; + + snew(qall, ms->nsim); + qall[re->repl] = q; + gmx_sum_sim(ms->nsim, qall, ms); + + bDiff = FALSE; + for (s = 1; s < ms->nsim; s++) + { + if (qall[s] != qall[0]) + { + bDiff = TRUE; + } + } + + if (bDiff) + { + /* Set the replica exchange type and quantities */ + re->type = ere; + + snew(re->q[ere], re->nrepl); + for (s = 0; s < ms->nsim; s++) + { + re->q[ere][s] = qall[s]; + } + } + sfree(qall); + return bDiff; +} + +gmx_repl_ex_t +init_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + int numAtomsInSystem, + const t_inputrec *ir, + const ReplicaExchangeParameters &replExParams) +{ + real pres; + int i, j; + struct gmx_repl_ex *re; + gmx_bool bTemp; + gmx_bool bLambda = FALSE; + + fprintf(fplog, "\nInitializing Replica Exchange\n"); + + if (!isMultiSim(ms) || ms->nsim == 1) + { + gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multidir option of mdrun?"); + } + if (!EI_DYNAMICS(ir->eI)) + { + gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations"); + /* Note that PAR(cr) is defined by cr->nnodes > 1, which is + * distinct from isMultiSim(ms). A multi-simulation only runs + * with real MPI parallelism, but this does not imply PAR(cr) + * is true! + * + * Since we are using a dynamical integrator, the only + * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are + * synonymous. The only way for cr->nnodes > 1 to be true is + * if we are using DD. */ + } + + snew(re, 1); + + re->repl = ms->sim; + re->nrepl = ms->nsim; + snew(re->q, ereENDSINGLE); + + fprintf(fplog, "Repl There are %d replicas:\n", re->nrepl); + + /* We only check that the number of atoms in the systms match. + * This, of course, do not guarantee that the systems are the same, + * but it does guarantee that we can perform replica exchange. + */ + check_multi_int(fplog, ms, numAtomsInSystem, "the number of atoms", FALSE); + check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE); + check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE); + const int nst = replExParams.exchangeInterval; + check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst, + "first exchange step: init_step/-replex", FALSE); + check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE); + check_multi_int(fplog, ms, ir->opts.ngtc, + "the number of temperature coupling groups", FALSE); + check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE); + check_multi_int(fplog, ms, ir->efep, "free energy", FALSE); + check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE); + + re->temp = ir->opts.ref_t[0]; + for (i = 1; (i < ir->opts.ngtc); i++) + { + if (ir->opts.ref_t[i] != re->temp) + { + fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n"); + } + } + + re->type = -1; + bTemp = repl_quantity(ms, re, ereTEMP, re->temp); + if (ir->efep != efepNO) + { + bLambda = repl_quantity(ms, re, ereLAMBDA, static_cast<real>(ir->fepvals->init_fep_state)); + } + if (re->type == -1) /* nothing was assigned */ + { + gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl); + } + if (bLambda && bTemp) + { + re->type = ereTL; + } + + if (bTemp) + { + please_cite(fplog, "Sugita1999a"); + if (ir->epc != epcNO) + { + re->bNPT = TRUE; + fprintf(fplog, "Repl Using Constant Pressure REMD.\n"); + please_cite(fplog, "Okabe2001a"); + } + if (ir->etc == etcBERENDSEN) + { + gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead", + ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE)); + } + } + if (bLambda) + { + if (ir->fepvals->delta_lambda != 0) /* check this? */ + { + gmx_fatal(FARGS, "delta_lambda is not zero"); + } + } + if (re->bNPT) + { + snew(re->pres, re->nrepl); + if (ir->epct == epctSURFACETENSION) + { + pres = ir->ref_p[ZZ][ZZ]; + } + else + { + pres = 0; + j = 0; + for (i = 0; i < DIM; i++) + { + if (ir->compress[i][i] != 0) + { + pres += ir->ref_p[i][i]; + j++; + } + } + pres /= j; + } + re->pres[re->repl] = pres; + gmx_sum_sim(re->nrepl, re->pres, ms); + } + + /* Make an index for increasing replica order */ + /* only makes sense if one or the other is varying, not both! + if both are varying, we trust the order the person gave. */ + snew(re->ind, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->ind[i] = i; + } + + if (re->type < ereENDSINGLE) + { + + for (i = 0; i < re->nrepl; i++) + { + for (j = i+1; j < re->nrepl; j++) + { + if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]]) + { + /* Unordered replicas are supposed to work, but there + * is still an issues somewhere. + * Note that at this point still re->ind[i]=i. + */ + gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s", + i, j, + erename[re->type], + re->q[re->type][i], re->q[re->type][j], + erename[re->type]); + } + else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]]) + { + gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]); + } + } + } + } + + /* keep track of all the swaps, starting with the initial placement. */ + snew(re->allswaps, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + re->allswaps[i] = re->ind[i]; + } + + switch (re->type) + { + case ereTEMP: + fprintf(fplog, "\nReplica exchange in temperature\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]); + } + fprintf(fplog, "\n"); + break; + case ereLAMBDA: + fprintf(fplog, "\nReplica exchange in lambda\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %3d", static_cast<int>(re->q[re->type][re->ind[i]])); + } + fprintf(fplog, "\n"); + break; + case ereTL: + fprintf(fplog, "\nReplica exchange in temperature and lambda state\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]); + } + fprintf(fplog, "\n"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5d", static_cast<int>(re->q[ereLAMBDA][re->ind[i]])); + } + fprintf(fplog, "\n"); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (re->bNPT) + { + fprintf(fplog, "\nRepl p"); + for (i = 0; i < re->nrepl; i++) + { + fprintf(fplog, " %5.2f", re->pres[re->ind[i]]); + } + + for (i = 0; i < re->nrepl; i++) + { + if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]])) + { + fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n"); + } + } + } + re->nst = nst; + if (replExParams.randomSeed == -1) + { + if (isMasterSim(ms)) + { + re->seed = static_cast<int>(gmx::makeRandomSeed()); + } + else + { + re->seed = 0; + } + gmx_sumi_sim(1, &(re->seed), ms); + } + else + { + re->seed = replExParams.randomSeed; + } + fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst); + fprintf(fplog, "\nReplica random seed: %d\n", re->seed); + + re->nattempt[0] = 0; + re->nattempt[1] = 0; + + snew(re->prob_sum, re->nrepl); + snew(re->nexchange, re->nrepl); + snew(re->nmoves, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->nmoves[i], re->nrepl); + } + fprintf(fplog, "Replica exchange information below: ex and x = exchange, pr = probability\n"); + + /* generate space for the helper functions so we don't have to snew each time */ + + snew(re->destinations, re->nrepl); + snew(re->incycle, re->nrepl); + snew(re->tmpswap, re->nrepl); + snew(re->cyclic, re->nrepl); + snew(re->order, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->cyclic[i], re->nrepl+1); + snew(re->order[i], re->nrepl); + } + /* allocate space for the functions storing the data for the replicas */ + /* not all of these arrays needed in all cases, but they don't take + up much space, since the max size is nrepl**2 */ + snew(re->prob, re->nrepl); + snew(re->bEx, re->nrepl); + snew(re->beta, re->nrepl); + snew(re->Vol, re->nrepl); + snew(re->Epot, re->nrepl); + snew(re->de, re->nrepl); + for (i = 0; i < re->nrepl; i++) + { + snew(re->de[i], re->nrepl); + } + re->nex = replExParams.numExchanges; + return re; +} + +static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n) +{ + real *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + + +static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n) +{ + double *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v, n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + v[i] = buf[i]; + } + sfree(buf); + } +} + +static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n) +{ + rvec *buf; + int i; + + if (v) + { + snew(buf, n); +#if GMX_MPI + /* + MPI_Sendrecv(v[0], n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0, + ms->mpi_comm_masters,MPI_STATUS_IGNORE); + */ + { + MPI_Request mpi_req; + + MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, &mpi_req); + MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0, + ms->mpi_comm_masters, MPI_STATUS_IGNORE); + MPI_Wait(&mpi_req, MPI_STATUS_IGNORE); + } +#endif + for (i = 0; i < n; i++) + { + copy_rvec(buf[i], v[i]); + } + sfree(buf); + } +} + +static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state) +{ + /* When t_state changes, this code should be updated. */ + int ngtc, nnhpres; + ngtc = state->ngtc * state->nhchainlength; + nnhpres = state->nnhpres* state->nhchainlength; + exchange_rvecs(ms, b, state->box, DIM); + exchange_rvecs(ms, b, state->box_rel, DIM); + exchange_rvecs(ms, b, state->boxv, DIM); + exchange_reals(ms, b, &(state->veta), 1); + exchange_reals(ms, b, &(state->vol0), 1); + exchange_rvecs(ms, b, state->svir_prev, DIM); + exchange_rvecs(ms, b, state->fvir_prev, DIM); + exchange_rvecs(ms, b, state->pres_prev, DIM); + exchange_doubles(ms, b, state->nosehoover_xi.data(), ngtc); + exchange_doubles(ms, b, state->nosehoover_vxi.data(), ngtc); + exchange_doubles(ms, b, state->nhpres_xi.data(), nnhpres); + exchange_doubles(ms, b, state->nhpres_vxi.data(), nnhpres); + exchange_doubles(ms, b, state->therm_integral.data(), state->ngtc); + exchange_doubles(ms, b, &state->baros_integral, 1); + exchange_rvecs(ms, b, state->x.rvec_array(), state->natoms); + exchange_rvecs(ms, b, state->v.rvec_array(), state->natoms); +} + +static void copy_state_serial(const t_state *src, t_state *dest) +{ + if (dest != src) + { + /* Currently the local state is always a pointer to the global + * in serial, so we should never end up here. + * TODO: Implement a (trivial) t_state copy once converted to C++. + */ + GMX_RELEASE_ASSERT(false, "State copying is currently not implemented in replica exchange"); + } +} + +static void scale_velocities(gmx::ArrayRef<gmx::RVec> velocities, real fac) +{ + for (auto &v : velocities) + { + v *= fac; + } +} + +static void print_transition_matrix(FILE *fplog, int n, int **nmoves, const int *nattempt) +{ + int i, j, ntot; + float Tprint; + + ntot = nattempt[0] + nattempt[1]; + fprintf(fplog, "\n"); + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, " "); /* put the title closer to the center */ + } + fprintf(fplog, "Empirical Transition Matrix\n"); + + fprintf(fplog, "Repl"); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%8d", (i+1)); + } + fprintf(fplog, "\n"); + + for (i = 0; i < n; i++) + { + fprintf(fplog, "Repl"); + for (j = 0; j < n; j++) + { + Tprint = 0.0; + if (nmoves[i][j] > 0) + { + Tprint = nmoves[i][j]/(2.0*ntot); + } + fprintf(fplog, "%8.4f", Tprint); + } + fprintf(fplog, "%3d\n", i); + } +} + +static void print_ind(FILE *fplog, const char *leg, int n, int *ind, const gmx_bool *bEx) +{ + int i; + + fprintf(fplog, "Repl %2s %2d", leg, ind[0]); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %c %2d", (bEx != nullptr && bEx[i]) ? 'x' : ' ', ind[i]); + } + fprintf(fplog, "\n"); +} + +static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap) +{ + int i; + + for (i = 0; i < n; i++) + { + tmpswap[i] = allswaps[i]; + } + for (i = 0; i < n; i++) + { + allswaps[i] = tmpswap[pind[i]]; + } + + fprintf(fplog, "\nAccepted Exchanges: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", pind[i]); + } + fprintf(fplog, "\n"); + + /* the "Order After Exchange" is the state label corresponding to the configuration that + started in state listed in order, i.e. + + 3 0 1 2 + + means that the: + configuration starting in simulation 3 is now in simulation 0, + configuration starting in simulation 0 is now in simulation 1, + configuration starting in simulation 1 is now in simulation 2, + configuration starting in simulation 2 is now in simulation 3 + */ + fprintf(fplog, "Order After Exchange: "); + for (i = 0; i < n; i++) + { + fprintf(fplog, "%d ", allswaps[i]); + } + fprintf(fplog, "\n\n"); +} + +static void print_prob(FILE *fplog, const char *leg, int n, real *prob) +{ + int i; + char buf[8]; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + if (prob[i] >= 0) + { + sprintf(buf, "%4.2f", prob[i]); + fprintf(fplog, " %3s", buf[0] == '1' ? "1.0" : buf+1); + } + else + { + fprintf(fplog, " "); + } + } + fprintf(fplog, "\n"); +} + +static void print_count(FILE *fplog, const char *leg, int n, int *count) +{ + int i; + + fprintf(fplog, "Repl %2s ", leg); + for (i = 1; i < n; i++) + { + fprintf(fplog, " %4d", count[i]); + } + fprintf(fplog, "\n"); +} + +static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) +{ + + real ediff, dpV, delta = 0; + real *Epot = re->Epot; + real *Vol = re->Vol; + real **de = re->de; + real *beta = re->beta; + + /* Two cases; we are permuted and not. In all cases, setting ap = a and bp = b will reduce + to the non permuted case */ + + switch (re->type) + { + case ereTEMP: + /* + * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439 + */ + ediff = Epot[b] - Epot[a]; + delta = -(beta[bp] - beta[ap])*ediff; + break; + case ereLAMBDA: + /* two cases: when we are permuted, and not. */ + /* non-permuted: + ediff = E_new - E_old + = [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)] + = [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)] + = de[b][a] + de[a][b] */ + + /* permuted: + ediff = E_new - E_old + = [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)] + = [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)] + = [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)] + = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]) */ + /* but, in the current code implementation, we flip configurations, not indices . . . + So let's examine that. + = [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)] + = [H_b(x_ap) - H_a(x_ap)] + [H_a(x_bp) - H_b(x_pb)] + = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp] + So, if we exchange b<=> bp and a<=> ap, we return to the same result. + So the simple solution is to flip the + position of perturbed and original indices in the tests. + */ + + ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]); + delta = ediff*beta[a]; /* assume all same temperature in this case */ + break; + case ereTL: + /* not permuted: */ + /* delta = reduced E_new - reduced E_old + = [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)] + = [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] + + [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)] + = [beta_b dH_b(x_a) + [beta_a dH_a(x_b) + + beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b)) + = beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */ + /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */ + /* permuted (big breath!) */ + /* delta = reduced E_new - reduced E_old + = [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + = [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)] + - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a) + - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] + + [(beta_ap H_ap(x_b) - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))] + + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b) + = [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] + + [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))] + + beta_pb (H_a(x_a) - H_b(x_b)) - beta_ap (H_a(x_a) - H_b(x_b)) + = ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b] - beta_bp de[bp][b]) + + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b)) */ + delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]); + break; + default: + gmx_incons("Unknown replica exchange quantity"); + } + if (bPrint) + { + fprintf(fplog, "Repl %d <-> %d dE_term = %10.3e (kT)\n", a, b, delta); + } + if (re->bNPT) + { + /* revist the calculation for 5.0. Might be some improvements. */ + dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC; + if (bPrint) + { + fprintf(fplog, " dpV = %10.3e d = %10.3e\n", dpV, delta + dpV); + } + delta += dpV; + } + return delta; +} + +static void +test_for_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + struct gmx_repl_ex *re, + const gmx_enerdata_t *enerd, + real vol, + int64_t step, + real time) +{ + int m, i, j, a, b, ap, bp, i0, i1, tmp; + real delta = 0; + gmx_bool bPrint, bMultiEx; + gmx_bool *bEx = re->bEx; + real *prob = re->prob; + int *pind = re->destinations; /* permuted index */ + gmx_bool bEpot = FALSE; + gmx_bool bDLambda = FALSE; + gmx_bool bVol = FALSE; + gmx::ThreeFry2x64<64> rng(re->seed, gmx::RandomDomain::ReplicaExchange); + gmx::UniformRealDistribution<real> uniformRealDist; + gmx::UniformIntDistribution<int> uniformNreplDist(0, re->nrepl-1); + + bMultiEx = (re->nex > 1); /* multiple exchanges at each state */ + fprintf(fplog, "Replica exchange at step %" PRId64 " time %.5f\n", step, time); + + if (re->bNPT) + { + for (i = 0; i < re->nrepl; i++) + { + re->Vol[i] = 0; + } + bVol = TRUE; + re->Vol[re->repl] = vol; + } + if ((re->type == ereTEMP || re->type == ereTL)) + { + for (i = 0; i < re->nrepl; i++) + { + re->Epot[i] = 0; + } + bEpot = TRUE; + re->Epot[re->repl] = enerd->term[F_EPOT]; + /* temperatures of different states*/ + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ); + } + } + else + { + for (i = 0; i < re->nrepl; i++) + { + re->beta[i] = 1.0/(re->temp*BOLTZ); /* we have a single temperature */ + } + } + if (re->type == ereLAMBDA || re->type == ereTL) + { + bDLambda = TRUE; + /* lambda differences. */ + /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian + minus the energy of the jth simulation in the jth Hamiltonian */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->de[i][j] = 0; + } + } + for (i = 0; i < re->nrepl; i++) + { + re->de[i][re->repl] = (enerd->enerpart_lambda[static_cast<int>(re->q[ereLAMBDA][i])+1]-enerd->enerpart_lambda[0]); + } + } + + /* now actually do the communication */ + if (bVol) + { + gmx_sum_sim(re->nrepl, re->Vol, ms); + } + if (bEpot) + { + gmx_sum_sim(re->nrepl, re->Epot, ms); + } + if (bDLambda) + { + for (i = 0; i < re->nrepl; i++) + { + gmx_sum_sim(re->nrepl, re->de[i], ms); + } + } + + /* make a duplicate set of indices for shuffling */ + for (i = 0; i < re->nrepl; i++) + { + pind[i] = re->ind[i]; + } + + rng.restart( step, 0 ); + + if (bMultiEx) + { + /* multiple random switch exchange */ + int nself = 0; + + + for (i = 0; i < re->nex + nself; i++) + { + // For now this is superfluous, but just in case we ever add more + // calls in different branches it is safer to always reset the distribution. + uniformNreplDist.reset(); + + /* randomly select a pair */ + /* in theory, could reduce this by identifying only which switches had a nonneglibible + probability of occurring (log p > -100) and only operate on those switches */ + /* find out which state it is from, and what label that state currently has. Likely + more work that useful. */ + i0 = uniformNreplDist(rng); + i1 = uniformNreplDist(rng); + if (i0 == i1) + { + nself++; + continue; /* self-exchange, back up and do it again */ + } + + a = re->ind[i0]; /* what are the indices of these states? */ + b = re->ind[i1]; + ap = pind[i0]; + bp = pind[i1]; + + bPrint = FALSE; /* too noisy */ + /* calculate the energy difference */ + /* if the code changes to flip the STATES, rather than the configurations, + use the commented version of the code */ + /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */ + delta = calc_delta(fplog, bPrint, re, ap, bp, a, b); + + /* we actually only use the first space in the prob and bEx array, + since there are actually many switches between pairs. */ + + if (delta <= 0) + { + /* accepted */ + prob[0] = 1; + bEx[0] = TRUE; + } + else + { + if (delta > c_probabilityCutoff) + { + prob[0] = 0; + } + else + { + prob[0] = exp(-delta); + } + // roll a number to determine if accepted. For now it is superfluous to + // reset, but just in case we ever add more calls in different branches + // it is safer to always reset the distribution. + uniformRealDist.reset(); + bEx[0] = uniformRealDist(rng) < prob[0]; + } + re->prob_sum[0] += prob[0]; + + if (bEx[0]) + { + /* swap the states */ + tmp = pind[i0]; + pind[i0] = pind[i1]; + pind[i1] = tmp; + } + } + re->nattempt[0]++; /* keep track of total permutation trials here */ + print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap); + } + else + { + /* standard nearest neighbor replica exchange */ + + m = (step / re->nst) % 2; + for (i = 1; i < re->nrepl; i++) + { + a = re->ind[i-1]; + b = re->ind[i]; + + bPrint = (re->repl == a || re->repl == b); + if (i % 2 == m) + { + delta = calc_delta(fplog, bPrint, re, a, b, a, b); + if (delta <= 0) + { + /* accepted */ + prob[i] = 1; + bEx[i] = TRUE; + } + else + { + if (delta > c_probabilityCutoff) + { + prob[i] = 0; + } + else + { + prob[i] = exp(-delta); + } + // roll a number to determine if accepted. For now it is superfluous to + // reset, but just in case we ever add more calls in different branches + // it is safer to always reset the distribution. + uniformRealDist.reset(); + bEx[i] = uniformRealDist(rng) < prob[i]; + } + re->prob_sum[i] += prob[i]; + + if (bEx[i]) + { + /* swap these two */ + tmp = pind[i-1]; + pind[i-1] = pind[i]; + pind[i] = tmp; + re->nexchange[i]++; /* statistics for back compatibility */ + } + } + else + { + prob[i] = -1; + bEx[i] = FALSE; + } + } + /* print some statistics */ + print_ind(fplog, "ex", re->nrepl, re->ind, bEx); + print_prob(fplog, "pr", re->nrepl, prob); + fprintf(fplog, "\n"); + re->nattempt[m]++; + } + + /* record which moves were made and accepted */ + for (i = 0; i < re->nrepl; i++) + { + re->nmoves[re->ind[i]][pind[i]] += 1; + re->nmoves[pind[i]][re->ind[i]] += 1; + } + fflush(fplog); /* make sure we can see what the last exchange was */ +} + +static void +cyclic_decomposition(const int *destinations, + int **cyclic, + gmx_bool *incycle, + const int nrepl, + int *nswap) +{ + + int i, j, c, p; + int maxlen = 1; + for (i = 0; i < nrepl; i++) + { + incycle[i] = FALSE; + } + for (i = 0; i < nrepl; i++) /* one cycle for each replica */ + { + if (incycle[i]) + { + cyclic[i][0] = -1; + continue; + } + cyclic[i][0] = i; + incycle[i] = TRUE; + c = 1; + p = i; + for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */ + { + p = destinations[p]; /* start permuting */ + if (p == i) + { + cyclic[i][c] = -1; + if (c > maxlen) + { + maxlen = c; + } + break; /* we've reached the original element, the cycle is complete, and we marked the end. */ + } + else + { + cyclic[i][c] = p; /* each permutation gives a new member of the cycle */ + incycle[p] = TRUE; + c++; + } + } + } + *nswap = maxlen - 1; + + if (debug) + { + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Cycle %d:", i); + for (j = 0; j < nrepl; j++) + { + if (cyclic[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", cyclic[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +compute_exchange_order(int **cyclic, + int **order, + const int nrepl, + const int maxswap) +{ + int i, j; + + for (j = 0; j < maxswap; j++) + { + for (i = 0; i < nrepl; i++) + { + if (cyclic[i][j+1] >= 0) + { + order[cyclic[i][j+1]][j] = cyclic[i][j]; + order[cyclic[i][j]][j] = cyclic[i][j+1]; + } + } + for (i = 0; i < nrepl; i++) + { + if (order[i][j] < 0) + { + order[i][j] = i; /* if it's not exchanging, it should stay this round*/ + } + } + } + + if (debug) + { + fprintf(debug, "Replica Exchange Order\n"); + for (i = 0; i < nrepl; i++) + { + fprintf(debug, "Replica %d:", i); + for (j = 0; j < maxswap; j++) + { + if (order[i][j] < 0) + { + break; + } + fprintf(debug, "%2d", order[i][j]); + } + fprintf(debug, "\n"); + } + fflush(debug); + } +} + +static void +prepare_to_do_exchange(struct gmx_repl_ex *re, + const int replica_id, + int *maxswap, + gmx_bool *bThisReplicaExchanged) +{ + int i, j; + /* Hold the cyclic decomposition of the (multiple) replica + * exchange. */ + gmx_bool bAnyReplicaExchanged = FALSE; + *bThisReplicaExchanged = FALSE; + + for (i = 0; i < re->nrepl; i++) + { + if (re->destinations[i] != re->ind[i]) + { + /* only mark as exchanged if the index has been shuffled */ + bAnyReplicaExchanged = TRUE; + break; + } + } + if (bAnyReplicaExchanged) + { + /* reinitialize the placeholder arrays */ + for (i = 0; i < re->nrepl; i++) + { + for (j = 0; j < re->nrepl; j++) + { + re->cyclic[i][j] = -1; + re->order[i][j] = -1; + } + } + + /* Identify the cyclic decomposition of the permutation (very + * fast if neighbor replica exchange). */ + cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap); + + /* Now translate the decomposition into a replica exchange + * order at each step. */ + compute_exchange_order(re->cyclic, re->order, re->nrepl, *maxswap); + + /* Did this replica do any exchange at any point? */ + for (j = 0; j < *maxswap; j++) + { + if (replica_id != re->order[replica_id][j]) + { + *bThisReplicaExchanged = TRUE; + break; + } + } + } +} + +gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, + const gmx_multisim_t *ms, struct gmx_repl_ex *re, + t_state *state, const gmx_enerdata_t *enerd, + t_state *state_local, int64_t step, real time) +{ + int j; + int replica_id = 0; + int exchange_partner; + int maxswap = 0; + /* Number of rounds of exchanges needed to deal with any multiple + * exchanges. */ + /* Where each replica ends up after the exchange attempt(s). */ + /* The order in which multiple exchanges will occur. */ + gmx_bool bThisReplicaExchanged = FALSE; + + if (MASTER(cr)) + { + replica_id = re->repl; + test_for_replica_exchange(fplog, ms, re, enerd, det(state_local->box), step, time); + prepare_to_do_exchange(re, replica_id, &maxswap, &bThisReplicaExchanged); + } + /* Do intra-simulation broadcast so all processors belonging to + * each simulation know whether they need to participate in + * collecting the state. Otherwise, they might as well get on with + * the next thing to do. */ + if (DOMAINDECOMP(cr)) + { +#if GMX_MPI + MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr), + cr->mpi_comm_mygroup); +#endif + } + + if (bThisReplicaExchanged) + { + /* Exchange the states */ + /* Collect the global state on the master node */ + if (DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state_local, state); + } + else + { + copy_state_serial(state_local, state); + } + + if (MASTER(cr)) + { + /* There will be only one swap cycle with standard replica + * exchange, but there may be multiple swap cycles if we + * allow multiple swaps. */ + + for (j = 0; j < maxswap; j++) + { + exchange_partner = re->order[replica_id][j]; + + if (exchange_partner != replica_id) + { + /* Exchange the global states between the master nodes */ + if (debug) + { + fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner); + } + exchange_state(ms, exchange_partner, state); + } + } + /* For temperature-type replica exchange, we need to scale + * the velocities. */ + if (re->type == ereTEMP || re->type == ereTL) + { + scale_velocities(state->v, + std::sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]])); + } + + } + + /* With domain decomposition the global state is distributed later */ + if (!DOMAINDECOMP(cr)) + { + /* Copy the global state to the local state data structure */ + copy_state_serial(state, state_local); + } + } + + return bThisReplicaExchanged; +} + +void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re) +{ + int i; + + fprintf(fplog, "\nReplica exchange statistics\n"); + + if (re->nex == 0) + { + fprintf(fplog, "Repl %d attempts, %d odd, %d even\n", + re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]); + + fprintf(fplog, "Repl average probabilities:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = re->prob_sum[i]/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, nullptr); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "Repl number of exchanges:\n"); + print_ind(fplog, "", re->nrepl, re->ind, nullptr); + print_count(fplog, "", re->nrepl, re->nexchange); + + fprintf(fplog, "Repl average number of exchanges:\n"); + for (i = 1; i < re->nrepl; i++) + { + if (re->nattempt[i%2] == 0) + { + re->prob[i] = 0; + } + else + { + re->prob[i] = (static_cast<real>(re->nexchange[i]))/re->nattempt[i%2]; + } + } + print_ind(fplog, "", re->nrepl, re->ind, nullptr); + print_prob(fplog, "", re->nrepl, re->prob); + + fprintf(fplog, "\n"); + } + /* print the transition matrix */ + print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt); +} + +//! \endcond diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.h b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.h new file mode 100644 index 0000000000000000000000000000000000000000..bff7c22ff9ac47719ec2a283253792f5dfec4e1d --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.h @@ -0,0 +1,116 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * + * \brief Declares the routines for replica exchange. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + */ +#ifndef GMX_MDRUN_REPLICAEXCHANGE_H +#define GMX_MDRUN_REPLICAEXCHANGE_H + +#include <cstdio> + +#include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/real.h" + +struct gmx_enerdata_t; +struct gmx_multisim_t; +struct t_commrec; +struct t_inputrec; +class t_state; + +/*! \libinternal + * \brief The parameters for the replica exchange algorithm. */ +struct ReplicaExchangeParameters +{ + //! Interval in steps at which to attempt exchanges, 0 means no replica exchange. + int exchangeInterval = 0; + //! The number of exchanges to attempt at an exchange step. + int numExchanges = 0; + //! The random seed, -1 means generate a seed. + int randomSeed = -1; +}; + +//! Abstract type for replica exchange +typedef struct gmx_repl_ex *gmx_repl_ex_t; + +/*! \brief Setup function. + * + * Should only be called on the master ranks */ +gmx_repl_ex_t +init_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + int numAtomsInSystem, + const t_inputrec *ir, + const ReplicaExchangeParameters &replExParams); + +/*! \brief Attempts replica exchange. + * + * Should be called on all ranks. When running each replica in + * parallel, this routine collects the state on the master rank before + * exchange. With domain decomposition, the global state after + * exchange is stored in state and still needs to be redistributed + * over the ranks. + * + * \returns TRUE if the state has been exchanged. + */ +gmx_bool replica_exchange(FILE *fplog, + const t_commrec *cr, + const gmx_multisim_t *ms, + gmx_repl_ex_t re, + t_state *state, const gmx_enerdata_t *enerd, + t_state *state_local, + int64_t step, real time); + +/*! \brief Prints replica exchange statistics to the log file. + * + * Should only be called on the master ranks */ +void print_replica_exchange_statistics(FILE *fplog, gmx_repl_ex_t re); + +/* PLUMED HREX */ +extern int replica_exchange_get_repl(const gmx_repl_ex_t re); +extern int replica_exchange_get_nrepl(const gmx_repl_ex_t re); +extern void pd_collect_state(const t_commrec *cr, t_state *state); +extern void exchange_state(const gmx_multisim_t *ms, int b, t_state *state); +extern void copy_state_serial(const t_state *src, t_state *dest); +/* END PLUMED HREX */ + +#endif diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.h.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.h.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..8f4211febe707fc6d474d2a5f840c2a2c79ca0de --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/replicaexchange.h.preplumed @@ -0,0 +1,108 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * + * \brief Declares the routines for replica exchange. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \author Mark Abraham <mark.j.abraham@gmail.com> + * + * \ingroup module_mdrun + */ +#ifndef GMX_MDRUN_REPLICAEXCHANGE_H +#define GMX_MDRUN_REPLICAEXCHANGE_H + +#include <cstdio> + +#include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/real.h" + +struct gmx_enerdata_t; +struct gmx_multisim_t; +struct t_commrec; +struct t_inputrec; +class t_state; + +/*! \libinternal + * \brief The parameters for the replica exchange algorithm. */ +struct ReplicaExchangeParameters +{ + //! Interval in steps at which to attempt exchanges, 0 means no replica exchange. + int exchangeInterval = 0; + //! The number of exchanges to attempt at an exchange step. + int numExchanges = 0; + //! The random seed, -1 means generate a seed. + int randomSeed = -1; +}; + +//! Abstract type for replica exchange +typedef struct gmx_repl_ex *gmx_repl_ex_t; + +/*! \brief Setup function. + * + * Should only be called on the master ranks */ +gmx_repl_ex_t +init_replica_exchange(FILE *fplog, + const gmx_multisim_t *ms, + int numAtomsInSystem, + const t_inputrec *ir, + const ReplicaExchangeParameters &replExParams); + +/*! \brief Attempts replica exchange. + * + * Should be called on all ranks. When running each replica in + * parallel, this routine collects the state on the master rank before + * exchange. With domain decomposition, the global state after + * exchange is stored in state and still needs to be redistributed + * over the ranks. + * + * \returns TRUE if the state has been exchanged. + */ +gmx_bool replica_exchange(FILE *fplog, + const t_commrec *cr, + const gmx_multisim_t *ms, + gmx_repl_ex_t re, + t_state *state, const gmx_enerdata_t *enerd, + t_state *state_local, + int64_t step, real time); + +/*! \brief Prints replica exchange statistics to the log file. + * + * Should only be called on the master ranks */ +void print_replica_exchange_statistics(FILE *fplog, gmx_repl_ex_t re); + +#endif diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/runner.cpp b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/runner.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d206adf79a142aeb3bddc42b5e8e69c81dff63b --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/runner.cpp @@ -0,0 +1,1956 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements the MD runner routine calling all integrators. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "runner.h" + +#include "config.h" + +#include <cassert> +#include <cinttypes> +#include <csignal> +#include <cstdlib> +#include <cstring> + +#include <algorithm> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/compat/make_unique.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/domdec/localatomsetmanager.h" +#include "gromacs/ewald/ewald-utils.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme-gpu-program.h" +#include "gromacs/fileio/checkpoint.h" +#include "gromacs/fileio/gmxfio.h" +#include "gromacs/fileio/oenv.h" +#include "gromacs/fileio/tpxio.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gpu_utils/clfftinitializer.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/hardware/cpuinfo.h" +#include "gromacs/hardware/detecthardware.h" +#include "gromacs/hardware/printhardware.h" +#include "gromacs/listed-forces/disre.h" +#include "gromacs/listed-forces/gpubonded.h" +#include "gromacs/listed-forces/orires.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/boxdeformation.h" +#include "gromacs/mdlib/calc_verletbuf.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/makeconstraints.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/membed.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h" +#include "gromacs/mdlib/nbnxn_search.h" +#include "gromacs/mdlib/nbnxn_tuning.h" +#include "gromacs/mdlib/ppforceworkload.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/stophandler.h" +#include "gromacs/mdrun/legacymdrunoptions.h" +#include "gromacs/mdrun/logging.h" +#include "gromacs/mdrun/multisim.h" +#include "gromacs/mdrun/simulationcontext.h" +#include "gromacs/mdrunutility/mdmodules.h" +#include "gromacs/mdrunutility/threadaffinity.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/fcdata.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/observableshistory.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/output.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/pulling/pull_rotation.h" +#include "gromacs/restraint/manager.h" +#include "gromacs/restraint/restraintmdmodule.h" +#include "gromacs/restraint/restraintpotential.h" +#include "gromacs/swap/swapcoords.h" +#include "gromacs/taskassignment/decidegpuusage.h" +#include "gromacs/taskassignment/resourcedivision.h" +#include "gromacs/taskassignment/taskassignment.h" +#include "gromacs/taskassignment/usergpuids.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/basenetwork.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/filestream.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/logger.h" +#include "gromacs/utility/loggerbuilder.h" +#include "gromacs/utility/physicalnodecommunicator.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/programcontext.h" +#include "gromacs/utility/smalloc.h" +#include "gromacs/utility/stringutil.h" + +#include "integrator.h" +#include "replicaexchange.h" + +#if GMX_FAHCORE +#include "corewrap.h" +#endif + +/* PLUMED */ +#include "../../../Plumed.h" +extern int plumedswitch; +extern plumed plumedmain; +/* END PLUMED */ +namespace gmx +{ + +/*! \brief Barrier for safe simultaneous thread access to mdrunner data + * + * Used to ensure that the master thread does not modify mdrunner during copy + * on the spawned threads. */ +static void threadMpiMdrunnerAccessBarrier() +{ +#if GMX_THREAD_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif +} + +Mdrunner Mdrunner::cloneOnSpawnedThread() const +{ + auto newRunner = Mdrunner(); + + // All runners in the same process share a restraint manager resource because it is + // part of the interface to the client code, which is associated only with the + // original thread. Handles to the same resources can be obtained by copy. + { + newRunner.restraintManager_ = compat::make_unique<RestraintManager>(*restraintManager_); + } + + // Copy original cr pointer before master thread can pass the thread barrier + newRunner.cr = reinitialize_commrec_for_this_thread(cr); + + // Copy members of master runner. + // \todo Replace with builder when Simulation context and/or runner phases are better defined. + // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375 + newRunner.hw_opt = hw_opt; + newRunner.filenames = filenames; + + newRunner.oenv = oenv; + newRunner.mdrunOptions = mdrunOptions; + newRunner.domdecOptions = domdecOptions; + newRunner.nbpu_opt = nbpu_opt; + newRunner.pme_opt = pme_opt; + newRunner.pme_fft_opt = pme_fft_opt; + newRunner.bonded_opt = bonded_opt; + newRunner.nstlist_cmdline = nstlist_cmdline; + newRunner.replExParams = replExParams; + newRunner.pforce = pforce; + newRunner.ms = ms; + newRunner.stopHandlerBuilder_ = compat::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_); + + threadMpiMdrunnerAccessBarrier(); + + GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads"); + + return newRunner; +} + +/*! \brief The callback used for running on spawned threads. + * + * Obtains the pointer to the master mdrunner object from the one + * argument permitted to the thread-launch API call, copies it to make + * a new runner for this thread, reinitializes necessary data, and + * proceeds to the simulation. */ +static void mdrunner_start_fn(const void *arg) +{ + try + { + auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg); + /* copy the arg list to make sure that it's thread-local. This + doesn't copy pointed-to items, of course; fnm, cr and fplog + are reset in the call below, all others should be const. */ + gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread(); + mdrunner.mdrunner(); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +} + + +/*! \brief Start thread-MPI threads. + * + * Called by mdrunner() to start a specific number of threads + * (including the main thread) for thread-parallel runs. This in turn + * calls mdrunner() for each thread. All options are the same as for + * mdrunner(). */ +t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const +{ + + /* first check whether we even need to start tMPI */ + if (numThreadsToLaunch < 2) + { + return cr; + } + +#if GMX_THREAD_MPI + /* now spawn new threads that start mdrunner_start_fn(), while + the main thread returns, we set thread affinity later */ + if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, + mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS) + { + GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads")); + } + + threadMpiMdrunnerAccessBarrier(); +#else + GMX_UNUSED_VALUE(mdrunner_start_fn); +#endif + + return reinitialize_commrec_for_this_thread(cr); +} + +} // namespace gmx + +/*! \brief Initialize variables for Verlet scheme simulation */ +static void prepare_verlet_scheme(FILE *fplog, + t_commrec *cr, + t_inputrec *ir, + int nstlist_cmdline, + const gmx_mtop_t *mtop, + const matrix box, + bool makeGpuPairList, + const gmx::CpuInfo &cpuinfo) +{ + /* For NVE simulations, we will retain the initial list buffer */ + if (EI_DYNAMICS(ir->eI) && + ir->verletbuf_tol > 0 && + !(EI_MD(ir->eI) && ir->etc == etcNO)) + { + /* Update the Verlet buffer size for the current run setup */ + + /* Here we assume SIMD-enabled kernels are being used. But as currently + * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 + * and 4x2 gives a larger buffer than 4x4, this is ok. + */ + ListSetupType listType = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported); + VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType); + + real rlist_new; + calc_verlet_buffer_size(mtop, det(box), ir, ir->nstlist, ir->nstlist - 1, -1, &listSetup, nullptr, &rlist_new); + + if (rlist_new != ir->rlist) + { + if (fplog != nullptr) + { + fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", + ir->rlist, rlist_new, + listSetup.cluster_size_i, listSetup.cluster_size_j); + } + ir->rlist = rlist_new; + } + } + + if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0)) + { + gmx_fatal(FARGS, "Can not set nstlist without %s", + !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance"); + } + + if (EI_DYNAMICS(ir->eI)) + { + /* Set or try nstlist values */ + increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo); + } +} + +/*! \brief Override the nslist value in inputrec + * + * with value passed on the command line (if any) + */ +static void override_nsteps_cmdline(const gmx::MDLogger &mdlog, + int64_t nsteps_cmdline, + t_inputrec *ir) +{ + assert(ir); + + /* override with anything else than the default -2 */ + if (nsteps_cmdline > -2) + { + char sbuf_steps[STEPSTRSIZE]; + char sbuf_msg[STRLEN]; + + ir->nsteps = nsteps_cmdline; + if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1) + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps", + gmx_step_str(nsteps_cmdline, sbuf_steps), + fabs(nsteps_cmdline*ir->delta_t)); + } + else + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps", + gmx_step_str(nsteps_cmdline, sbuf_steps)); + } + + GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg); + } + else if (nsteps_cmdline < -2) + { + gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, + nsteps_cmdline); + } + /* Do nothing if nsteps_cmdline == -2 */ +} + +namespace gmx +{ + +/*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings. + * + * If not, and if a warning may be issued, logs a warning about + * falling back to CPU code. With thread-MPI, only the first + * call to this function should have \c issueWarning true. */ +static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger &mdlog, + const t_inputrec *ir, + bool issueWarning) +{ + if (ir->opts.ngener - ir->nwall > 1) + { + /* The GPU code does not support more than one energy group. + * If the user requested GPUs explicitly, a fatal error is given later. + */ + if (issueWarning) + { + GMX_LOG(mdlog.warning).asParagraph() + .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. " + "For better performance, run on the GPU without energy groups and then do " + "gmx mdrun -rerun option on the trajectory with an energy group .tpr file."); + } + return false; + } + return true; +} + +//! Initializes the logger for mdrun. +static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr) +{ + gmx::LoggerBuilder builder; + if (fplog != nullptr) + { + builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog); + } + if (cr == nullptr || SIMMASTER(cr)) + { + builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, + &gmx::TextOutputFile::standardError()); + } + return builder.build(); +} + +//! Make a TaskTarget from an mdrun argument string. +static TaskTarget findTaskTarget(const char *optionString) +{ + TaskTarget returnValue = TaskTarget::Auto; + + if (strncmp(optionString, "auto", 3) == 0) + { + returnValue = TaskTarget::Auto; + } + else if (strncmp(optionString, "cpu", 3) == 0) + { + returnValue = TaskTarget::Cpu; + } + else if (strncmp(optionString, "gpu", 3) == 0) + { + returnValue = TaskTarget::Gpu; + } + else + { + GMX_ASSERT(false, "Option string should have been checked for sanity already"); + } + + return returnValue; +} + +int Mdrunner::mdrunner() +{ + matrix box; + t_nrnb *nrnb; + t_forcerec *fr = nullptr; + t_fcdata *fcd = nullptr; + real ewaldcoeff_q = 0; + real ewaldcoeff_lj = 0; + int nChargePerturbed = -1, nTypePerturbed = 0; + gmx_wallcycle_t wcycle; + gmx_walltime_accounting_t walltime_accounting = nullptr; + int rc; + int64_t reset_counters; + int nthreads_pme = 1; + gmx_membed_t * membed = nullptr; + gmx_hw_info_t *hwinfo = nullptr; + + /* CAUTION: threads may be started later on in this function, so + cr doesn't reflect the final parallel state right now */ + std::unique_ptr<gmx::MDModules> mdModules(new gmx::MDModules); + t_inputrec inputrecInstance; + t_inputrec *inputrec = &inputrecInstance; + gmx_mtop_t mtop; + + bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data()); + bool doRerun = mdrunOptions.rerun; + + // Handle task-assignment related user options. + EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ? + EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No); + std::vector<int> gpuIdsAvailable; + try + { + gpuIdsAvailable = parseUserGpuIds(hw_opt.gpuIdsAvailable); + // TODO We could put the GPU IDs into a std::map to find + // duplicates, but for the small numbers of IDs involved, this + // code is simple and fast. + for (size_t i = 0; i != gpuIdsAvailable.size(); ++i) + { + for (size_t j = i+1; j != gpuIdsAvailable.size(); ++j) + { + if (gpuIdsAvailable[i] == gpuIdsAvailable[j]) + { + GMX_THROW(InvalidInputError(formatString("The string of available GPU device IDs '%s' may not contain duplicate device IDs", hw_opt.gpuIdsAvailable.c_str()))); + } + } + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + std::vector<int> userGpuTaskAssignment; + try + { + userGpuTaskAssignment = parseUserGpuIds(hw_opt.userGpuTaskAssignment); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + auto nonbondedTarget = findTaskTarget(nbpu_opt); + auto pmeTarget = findTaskTarget(pme_opt); + auto pmeFftTarget = findTaskTarget(pme_fft_opt); + auto bondedTarget = findTaskTarget(bonded_opt); + PmeRunMode pmeRunMode = PmeRunMode::None; + + // Here we assume that SIMMASTER(cr) does not change even after the + // threads are started. + + FILE *fplog = nullptr; + // If we are appending, we don't write log output because we need + // to check that the old log file matches what the checkpoint file + // expects. Otherwise, we should start to write log output now if + // there is a file ready for it. + if (logFileHandle != nullptr && !mdrunOptions.continuationOptions.appendFiles) + { + fplog = gmx_fio_getfp(logFileHandle); + } + gmx::LoggerOwner logOwner(buildLogger(fplog, cr)); + gmx::MDLogger mdlog(logOwner.logger()); + + // TODO The thread-MPI master rank makes a working + // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks + // after the threads have been launched. This works because no use + // is made of that communicator until after the execution paths + // have rejoined. But it is likely that we can improve the way + // this is expressed, e.g. by expressly running detection only the + // master rank for thread-MPI, rather than relying on the mutex + // and reference count. + PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash()); + hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm); + + gmx_print_detected_hardware(fplog, cr, ms, mdlog, hwinfo); + + std::vector<int> gpuIdsToUse; + auto compatibleGpus = getCompatibleGpus(hwinfo->gpu_info); + if (gpuIdsAvailable.empty()) + { + gpuIdsToUse = compatibleGpus; + } + else + { + for (const auto &availableGpuId : gpuIdsAvailable) + { + bool availableGpuIsCompatible = false; + for (const auto &compatibleGpuId : compatibleGpus) + { + if (availableGpuId == compatibleGpuId) + { + availableGpuIsCompatible = true; + break; + } + } + if (!availableGpuIsCompatible) + { + gmx_fatal(FARGS, "You limited the set of compatible GPUs to a set that included ID #%d, but that ID is not for a compatible GPU. List only compatible GPUs.", availableGpuId); + } + gpuIdsToUse.push_back(availableGpuId); + } + } + + if (fplog != nullptr) + { + /* Print references after all software/hardware printing */ + please_cite(fplog, "Abraham2015"); + please_cite(fplog, "Pall2015"); + please_cite(fplog, "Pronk2013"); + please_cite(fplog, "Hess2008b"); + please_cite(fplog, "Spoel2005a"); + please_cite(fplog, "Lindahl2001a"); + please_cite(fplog, "Berendsen95a"); + writeSourceDoi(fplog); + } + + std::unique_ptr<t_state> globalState; + + if (SIMMASTER(cr)) + { + /* Only the master rank has the global state */ + globalState = compat::make_unique<t_state>(); + + /* Read (nearly) all data required for the simulation */ + read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop); + + /* In rerun, set velocities to zero if present */ + if (doRerun && ((globalState->flags & (1 << estV)) != 0)) + { + // rerun does not use velocities + GMX_LOG(mdlog.info).asParagraph().appendText( + "Rerun trajectory contains velocities. Rerun does only evaluate " + "potential energy and forces. The velocities will be ignored."); + for (int i = 0; i < globalState->natoms; i++) + { + clear_rvec(globalState->v[i]); + } + globalState->flags &= ~(1 << estV); + } + + if (inputrec->cutoff_scheme != ecutsVERLET) + { + if (nstlist_cmdline > 0) + { + gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme"); + } + + if (!compatibleGpus.empty()) + { + GMX_LOG(mdlog.warning).asParagraph().appendText( + "NOTE: GPU(s) found, but the current simulation can not use GPUs\n" + " To use a GPU, set the mdp option: cutoff-scheme = Verlet"); + } + } + } + + /* Check and update the hardware options for internal consistency */ + check_and_update_hw_opt_1(mdlog, &hw_opt, cr, domdecOptions.numPmeRanks); + + /* Early check for externally set process affinity. */ + gmx_check_thread_affinity_set(mdlog, cr, + &hw_opt, hwinfo->nthreads_hw_avail, FALSE); + + if (GMX_THREAD_MPI && SIMMASTER(cr)) + { + if (domdecOptions.numPmeRanks > 0 && hw_opt.nthreads_tmpi <= 0) + { + gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks"); + } + + /* Since the master knows the cut-off scheme, update hw_opt for this. + * This is done later for normal MPI and also once more with tMPI + * for all tMPI ranks. + */ + check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme); + + bool useGpuForNonbonded = false; + bool useGpuForPme = false; + try + { + // If the user specified the number of ranks, then we must + // respect that, but in default mode, we need to allow for + // the number of GPUs to choose the number of ranks. + auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); + useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi + (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded, + canUseGpuForNonbonded, + inputrec->cutoff_scheme == ecutsVERLET, + gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI), + hw_opt.nthreads_tmpi); + useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi + (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, + *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); + + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + /* Determine how many thread-MPI ranks to start. + * + * TODO Over-writing the user-supplied value here does + * prevent any possible subsequent checks from working + * correctly. */ + hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo, + &hw_opt, + gpuIdsToUse, + useGpuForNonbonded, + useGpuForPme, + inputrec, &mtop, + mdlog, + doMembed); + + // Now start the threads for thread MPI. + cr = spawnThreads(hw_opt.nthreads_tmpi); + /* The main thread continues here with a new cr. We don't deallocate + the old cr because other threads may still be reading it. */ + // TODO Both master and spawned threads call dup_tfn and + // reinitialize_commrec_for_this_thread. Find a way to express + // this better. + physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()); + } + // END OF CAUTION: cr and physicalNodeComm are now reliable + + if (PAR(cr)) + { + /* now broadcast everything to the non-master nodes/threads: */ + init_parallel(cr, inputrec, &mtop); + } + + // Now each rank knows the inputrec that SIMMASTER read and used, + // and (if applicable) cr->nnodes has been assigned the number of + // thread-MPI ranks that have been chosen. The ranks can now all + // run the task-deciding functions and will agree on the result + // without needing to communicate. + // + // TODO Should we do the communication in debug mode to support + // having an assertion? + // + // Note that these variables describe only their own node. + // + // Note that when bonded interactions run on a GPU they always run + // alongside a nonbonded task, so do not influence task assignment + // even though they affect the force calculation workload. + bool useGpuForNonbonded = false; + bool useGpuForPme = false; + bool useGpuForBonded = false; + try + { + // It's possible that there are different numbers of GPUs on + // different nodes, which is the user's responsibilty to + // handle. If unsuitable, we will notice that during task + // assignment. + bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0; + bool usingVerletScheme = inputrec->cutoff_scheme == ecutsVERLET; + auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); + useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment, + emulateGpuNonbonded, + canUseGpuForNonbonded, + usingVerletScheme, + gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI), + gpusWereDetected); + useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, + *hwinfo, *inputrec, mtop, + cr->nnodes, domdecOptions.numPmeRanks, + gpusWereDetected); + auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr); + useGpuForBonded = + decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, usingVerletScheme, + bondedTarget, canUseGpuForBonded, + EVDW_PME(inputrec->vdwtype), + EEL_PME_EWALD(inputrec->coulombtype), + domdecOptions.numPmeRanks, gpusWereDetected); + + pmeRunMode = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU); + if (pmeRunMode == PmeRunMode::GPU) + { + if (pmeFftTarget == TaskTarget::Cpu) + { + pmeRunMode = PmeRunMode::Mixed; + } + } + else if (pmeFftTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft."); + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + // Build restraints. + // TODO: hide restraint implementation details from Mdrunner. + // There is nothing unique about restraints at this point as far as the + // Mdrunner is concerned. The Mdrunner should just be getting a sequence of + // factory functions from the SimulationContext on which to call mdModules->add(). + // TODO: capture all restraints into a single RestraintModule, passed to the runner builder. + for (auto && restraint : restraintManager_->getRestraints()) + { + auto module = RestraintMDModule::create(restraint, + restraint->sites()); + mdModules->add(std::move(module)); + } + + // TODO: Error handling + mdModules->assignOptionsToModules(*inputrec->params, nullptr); + + if (fplog != nullptr) + { + pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE); + fprintf(fplog, "\n"); + } + + if (SIMMASTER(cr)) + { + /* now make sure the state is initialized and propagated */ + set_state_entries(globalState.get(), inputrec); + } + + /* NM and TPI parallelize over force/energy calculations, not atoms, + * so we need to initialize and broadcast the global state. + */ + if (inputrec->eI == eiNM || inputrec->eI == eiTPI) + { + if (!MASTER(cr)) + { + globalState = compat::make_unique<t_state>(); + } + broadcastStateWithoutDynamics(cr, globalState.get()); + } + + /* A parallel command line option consistency check that we can + only do after any threads have started. */ + if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 || + domdecOptions.numCells[YY] > 1 || + domdecOptions.numCells[ZZ] > 1 || + domdecOptions.numPmeRanks > 0)) + { + gmx_fatal(FARGS, + "The -dd or -npme option request a parallel simulation, " +#if !GMX_MPI + "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv)); +#else +#if GMX_THREAD_MPI + "but the number of MPI-threads (option -ntmpi) is not set or is 1"); +#else + "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv)); +#endif +#endif + } + + if (doRerun && + (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) + { + gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun"); + } + + if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank"); + } + + if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) + { + if (domdecOptions.numPmeRanks > 0) + { + gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), + "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ"); + } + + domdecOptions.numPmeRanks = 0; + } + + if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0) + { + /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can + * improve performance with many threads per GPU, since our OpenMP + * scaling is bad, but it's difficult to automate the setup. + */ + domdecOptions.numPmeRanks = 0; + } + if (useGpuForPme) + { + if (domdecOptions.numPmeRanks < 0) + { + domdecOptions.numPmeRanks = 0; + // TODO possibly print a note that one can opt-in for a separate PME GPU rank? + } + else + { + GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported"); + } + } + +#if GMX_FAHCORE + if (MASTER(cr)) + { + fcRegisterSteps(inputrec->nsteps, inputrec->init_step); + } +#endif + + /* NMR restraints must be initialized before load_checkpoint, + * since with time averaging the history is added to t_state. + * For proper consistency check we therefore need to extend + * t_state here. + * So the PME-only nodes (if present) will also initialize + * the distance restraints. + */ + snew(fcd, 1); + + /* This needs to be called before read_checkpoint to extend the state */ + init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0); + + init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires)); + + auto deform = prepareBoxDeformation(globalState->box, cr, *inputrec); + + ObservablesHistory observablesHistory = {}; + + ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions; + + if (continuationOptions.startedFromCheckpoint) + { + /* Check if checkpoint file exists before doing continuation. + * This way we can use identical input options for the first and subsequent runs... + */ + gmx_bool bReadEkin; + + load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr), + logFileHandle, + cr, domdecOptions.numCells, + inputrec, globalState.get(), + &bReadEkin, &observablesHistory, + continuationOptions.appendFiles, + continuationOptions.appendFilesOptionSet, + mdrunOptions.reproducible); + + if (bReadEkin) + { + continuationOptions.haveReadEkin = true; + } + + if (continuationOptions.appendFiles && logFileHandle) + { + // Now we can start normal logging to the truncated log file. + fplog = gmx_fio_getfp(logFileHandle); + prepareLogAppending(fplog); + logOwner = buildLogger(fplog, cr); + mdlog = logOwner.logger(); + } + } + + if (mdrunOptions.numStepsCommandline > -2) + { + GMX_LOG(mdlog.info).asParagraph(). + appendText("The -nsteps functionality is deprecated, and may be removed in a future version. " + "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field."); + } + /* override nsteps with value set on the commamdline */ + override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec); + + if (SIMMASTER(cr)) + { + copy_mat(globalState->box, box); + } + + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + + /* Update rlist and nstlist. */ + if (inputrec->cutoff_scheme == ecutsVERLET) + { + prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box, + useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo); + } + + LocalAtomSetManager atomSets; + + if (PAR(cr) && !(EI_TPI(inputrec->eI) || + inputrec->eI == eiNM)) + { + cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions, + &mtop, inputrec, + box, positionsFromStatePointer(globalState.get()), + &atomSets); + // Note that local state still does not exist yet. + } + else + { + /* PME, if used, is done on all nodes with 1D decomposition */ + cr->npmenodes = 0; + cr->duty = (DUTY_PP | DUTY_PME); + + if (inputrec->ePBC == epbcSCREW) + { + gmx_fatal(FARGS, + "pbc=screw is only implemented with domain decomposition"); + } + } + + if (PAR(cr)) + { + /* After possible communicator splitting in make_dd_communicators. + * we can set up the intra/inter node communication. + */ + gmx_setup_nodecomm(fplog, cr); + } + +#if GMX_MPI + if (isMultiSim(ms)) + { + GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted( + "This is simulation %d out of %d running as a composite GROMACS\n" + "multi-simulation job. Setup for this simulation:\n", + ms->sim, ms->nsim); + } + GMX_LOG(mdlog.warning).appendTextFormatted( + "Using %d MPI %s\n", + cr->nnodes, +#if GMX_THREAD_MPI + cr->nnodes == 1 ? "thread" : "threads" +#else + cr->nnodes == 1 ? "process" : "processes" +#endif + ); + fflush(stderr); +#endif + + /* Check and update hw_opt for the cut-off scheme */ + check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme); + + /* Check and update the number of OpenMP threads requested */ + checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_, + pmeRunMode, mtop); + + gmx_omp_nthreads_init(mdlog, cr, + hwinfo->nthreads_hw_avail, + physicalNodeComm.size_, + hw_opt.nthreads_omp, + hw_opt.nthreads_omp_pme, + !thisRankHasDuty(cr, DUTY_PP), + inputrec->cutoff_scheme == ecutsVERLET); + + // Enable FP exception detection for the Verlet scheme, but not in + // Release mode and not for compilers with known buggy FP + // exception support (clang with any optimization) or suspected + // buggy FP exception support (gcc 7.* with optimization). +#if !defined NDEBUG && \ + !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \ + && defined __OPTIMIZE__) + const bool bEnableFPE = inputrec->cutoff_scheme == ecutsVERLET; +#else + const bool bEnableFPE = false; +#endif + //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run() + if (bEnableFPE) + { + gmx_feenableexcept(); + } + + // Build a data structure that expresses which kinds of non-bonded + // task are handled by this rank. + // + // TODO Later, this might become a loop over all registered modules + // relevant to the mdp inputs, to find those that have such tasks. + // + // TODO This could move before init_domain_decomposition() as part + // of refactoring that separates the responsibility for duty + // assignment from setup for communication between tasks, and + // setup for tasks handled with a domain (ie including short-ranged + // tasks, bonded tasks, etc.). + // + // Note that in general useGpuForNonbonded, etc. can have a value + // that is inconsistent with the presence of actual GPUs on any + // rank, and that is not known to be a problem until the + // duty of the ranks on a node become known. + // + // TODO Later we might need the concept of computeTasksOnThisRank, + // from which we construct gpuTasksOnThisRank. + // + // Currently the DD code assigns duty to ranks that can + // include PP work that currently can be executed on a single + // GPU, if present and compatible. This has to be coordinated + // across PP ranks on a node, with possible multiple devices + // or sharing devices on a node, either from the user + // selection, or automatically. + auto haveGpus = !gpuIdsToUse.empty(); + std::vector<GpuTask> gpuTasksOnThisRank; + if (thisRankHasDuty(cr, DUTY_PP)) + { + if (useGpuForNonbonded) + { + // Note that any bonded tasks on a GPU always accompany a + // non-bonded task. + if (haveGpus) + { + gpuTasksOnThisRank.push_back(GpuTask::Nonbonded); + } + else if (nonbondedTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected."); + } + else if (bondedTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected."); + } + } + } + // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not. + if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME))) + { + if (useGpuForPme) + { + if (haveGpus) + { + gpuTasksOnThisRank.push_back(GpuTask::Pme); + } + else if (pmeTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Cannot run PME on a GPU because there is none detected."); + } + } + } + + GpuTaskAssignment gpuTaskAssignment; + try + { + // Produce the task assignment for this rank. + gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo, + mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank, + useGpuForBonded, pmeRunMode); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + /* Prevent other ranks from continuing after an issue was found + * and reported as a fatal error. + * + * TODO This function implements a barrier so that MPI runtimes + * can organize an orderly shutdown if one of the ranks has had to + * issue a fatal error in various code already run. When we have + * MPI-aware error handling and reporting, this should be + * improved. */ +#if GMX_MPI + if (PAR(cr)) + { + MPI_Barrier(cr->mpi_comm_mysim); + } + if (isMultiSim(ms)) + { + if (SIMMASTER(cr)) + { + MPI_Barrier(ms->mpi_comm_masters); + } + /* We need another barrier to prevent non-master ranks from contiuing + * when an error occured in a different simulation. + */ + MPI_Barrier(cr->mpi_comm_mysim); + } +#endif + + /* Now that we know the setup is consistent, check for efficiency */ + check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet, + cr, mdlog); + + gmx_device_info_t *nonbondedDeviceInfo = nullptr; + + if (thisRankHasDuty(cr, DUTY_PP)) + { + // This works because only one task of each type is currently permitted. + auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), + hasTaskType<GpuTask::Nonbonded>); + if (nbGpuTaskMapping != gpuTaskAssignment.end()) + { + int nonbondedDeviceId = nbGpuTaskMapping->deviceId_; + nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId); + init_gpu(nonbondedDeviceInfo); + + if (DOMAINDECOMP(cr)) + { + /* When we share GPUs over ranks, we need to know this for the DLB */ + dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId); + } + + } + } + + std::unique_ptr<ClfftInitializer> initializedClfftLibrary; + + gmx_device_info_t *pmeDeviceInfo = nullptr; + // Later, this program could contain kernels that might be later + // re-used as auto-tuning progresses, or subsequent simulations + // are invoked. + PmeGpuProgramStorage pmeGpuProgram; + // This works because only one task of each type is currently permitted. + auto pmeGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>); + const bool thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end()); + if (thisRankHasPmeGpuTask) + { + pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_); + init_gpu(pmeDeviceInfo); + pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo); + // TODO It would be nice to move this logic into the factory + // function. See Redmine #2535. + bool isMasterThread = !GMX_THREAD_MPI || MASTER(cr); + if (pmeRunMode == PmeRunMode::GPU && !initializedClfftLibrary && isMasterThread) + { + initializedClfftLibrary = initializeClfftLibrary(); + } + } + + /* getting number of PP/PME threads + PME: env variable should be read only on one node to make sure it is + identical everywhere; + */ + nthreads_pme = gmx_omp_nthreads_get(emntPME); + + int numThreadsOnThisRank; + /* threads on this MPI process or TMPI thread */ + if (thisRankHasDuty(cr, DUTY_PP)) + { + numThreadsOnThisRank = gmx_omp_nthreads_get(emntNonbonded); + } + else + { + numThreadsOnThisRank = nthreads_pme; + } + + checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, + *hwinfo->hardwareTopology, + physicalNodeComm, mdlog); + + if (hw_opt.thread_affinity != threadaffOFF) + { + /* Before setting affinity, check whether the affinity has changed + * - which indicates that probably the OpenMP library has changed it + * since we first checked). + */ + gmx_check_thread_affinity_set(mdlog, cr, + &hw_opt, hwinfo->nthreads_hw_avail, TRUE); + + int numThreadsOnThisNode, intraNodeThreadOffset; + analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode, + &intraNodeThreadOffset); + + /* Set the CPU affinity */ + gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, + numThreadsOnThisRank, numThreadsOnThisNode, + intraNodeThreadOffset, nullptr); + } + + if (mdrunOptions.timingOptions.resetStep > -1) + { + GMX_LOG(mdlog.info).asParagraph(). + appendText("The -resetstep functionality is deprecated, and may be removed in a future version."); + } + wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr); + + if (PAR(cr)) + { + /* Master synchronizes its value of reset_counters with all nodes + * including PME only nodes */ + reset_counters = wcycle_get_reset_counters(wcycle); + gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr); + wcycle_set_reset_counters(wcycle, reset_counters); + } + + // Membrane embedding must be initialized before we call init_forcerec() + if (doMembed) + { + if (MASTER(cr)) + { + fprintf(stderr, "Initializing membed"); + } + /* Note that membed cannot work in parallel because mtop is + * changed here. Fix this if we ever want to make it run with + * multiple ranks. */ + membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr, + &mdrunOptions + .checkpointOptions.period); + } + + std::unique_ptr<MDAtoms> mdAtoms; + std::unique_ptr<gmx_vsite_t> vsite; + + snew(nrnb, 1); + if (thisRankHasDuty(cr, DUTY_PP)) + { + /* Initiate forcerecord */ + fr = mk_forcerec(); + fr->forceProviders = mdModules->initForceProviders(); + init_forcerec(fplog, mdlog, fr, fcd, + inputrec, &mtop, cr, box, + opt2fn("-table", filenames.size(), filenames.data()), + opt2fn("-tablep", filenames.size(), filenames.data()), + opt2fns("-tableb", filenames.size(), filenames.data()), + *hwinfo, nonbondedDeviceInfo, + useGpuForBonded, + FALSE, + pforce); + + /* Initialize the mdAtoms structure. + * mdAtoms is not filled with atom data, + * as this can not be done now with domain decomposition. + */ + mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask); + if (globalState && thisRankHasPmeGpuTask) + { + // The pinning of coordinates in the global state object works, because we only use + // PME on GPU without DD or on a separate PME rank, and because the local state pointer + // points to the global state object without DD. + // FIXME: MD and EM separately set up the local state - this should happen in the same function, + // which should also perform the pinning. + changePinningPolicy(&globalState->x, pme_get_pinning_policy()); + } + + /* Initialize the virtual site communication */ + vsite = initVsite(mtop, cr); + + calc_shifts(box, fr->shift_vec); + + /* With periodic molecules the charge groups should be whole at start up + * and the virtual sites should not be far from their proper positions. + */ + if (!inputrec->bContinuation && MASTER(cr) && + !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols)) + { + /* Make molecules whole at start of run */ + if (fr->ePBC != epbcNONE) + { + do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array()); + } + if (vsite) + { + /* Correct initial vsite positions are required + * for the initial distribution in the domain decomposition + * and for the initial shell prediction. + */ + constructVsitesGlobal(mtop, globalState->x); + } + } + + if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) + { + ewaldcoeff_q = fr->ic->ewaldcoeff_q; + ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; + } + } + else + { + /* This is a PME only node */ + + GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized"); + + ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); + ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); + } + + gmx_pme_t *sepPmeData = nullptr; + // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks + GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec"); + gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData; + + /* Initiate PME if necessary, + * either on all nodes or on dedicated PME nodes only. */ + if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) + { + if (mdAtoms && mdAtoms->mdatoms()) + { + nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed; + if (EVDW_PME(inputrec->vdwtype)) + { + nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed; + } + } + if (cr->npmenodes > 0) + { + /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ + gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr); + gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr); + } + + if (thisRankHasDuty(cr, DUTY_PME)) + { + try + { + pmedata = gmx_pme_init(cr, + getNumPmeDomains(cr->dd), + inputrec, + mtop.natoms, nChargePerturbed != 0, nTypePerturbed != 0, + mdrunOptions.reproducible, + ewaldcoeff_q, ewaldcoeff_lj, + nthreads_pme, + pmeRunMode, nullptr, + pmeDeviceInfo, pmeGpuProgram.get(), mdlog); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + } + + + if (EI_DYNAMICS(inputrec->eI)) + { + /* Turn on signal handling on all nodes */ + /* + * (A user signal from the PME nodes (if any) + * is communicated to the PP nodes. + */ + signal_handler_install(); + } + + if (thisRankHasDuty(cr, DUTY_PP)) + { + /* Assumes uniform use of the number of OpenMP threads */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); + + if (inputrec->bPull) + { + /* Initialize pull code */ + inputrec->pull_work = + init_pull(fplog, inputrec->pull, inputrec, + &mtop, cr, &atomSets, inputrec->fepvals->init_lambda); + if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage) + { + initPullHistory(inputrec->pull_work, &observablesHistory); + } + if (EI_DYNAMICS(inputrec->eI) && MASTER(cr)) + { + init_pull_output_files(inputrec->pull_work, + filenames.size(), filenames.data(), oenv, + continuationOptions); + } + } + + std::unique_ptr<EnforcedRotation> enforcedRotation; + if (inputrec->bRot) + { + /* Initialize enforced rotation code */ + enforcedRotation = init_rot(fplog, + inputrec, + filenames.size(), + filenames.data(), + cr, + &atomSets, + globalState.get(), + &mtop, + oenv, + mdrunOptions); + } + + if (inputrec->eSwapCoords != eswapNO) + { + /* Initialize ion swapping code */ + init_swapcoords(fplog, inputrec, opt2fn_master("-swap", filenames.size(), filenames.data(), cr), + &mtop, globalState.get(), &observablesHistory, + cr, &atomSets, oenv, mdrunOptions); + } + + /* Let makeConstraints know whether we have essential dynamics constraints. + * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint + */ + bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr + || observablesHistory.edsamHistory); + auto constr = makeConstraints(mtop, *inputrec, doEssentialDynamics, + fplog, *mdAtoms->mdatoms(), + cr, ms, nrnb, wcycle, fr->bMolPBC); + + if (DOMAINDECOMP(cr)) + { + GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); + /* This call is not included in init_domain_decomposition mainly + * because fr->cginfo_mb is set later. + */ + dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec, + domdecOptions.checkBondedInteractions, + fr->cginfo_mb); + } + + // TODO This is not the right place to manage the lifetime of + // this data structure, but currently it's the easiest way to + // make it work. Later, it should probably be made/updated + // after the workload for the lifetime of a PP domain is + // understood. + PpForceWorkload ppForceWorkload; + + GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to integrator."); + + /* PLUMED */ + if(plumedswitch){ + /* detect plumed API version */ + int pversion=0; + plumed_cmd(plumedmain,"getApiVersion",&pversion); + if(pversion>5) { + int nth = gmx_omp_nthreads_get(emntDefault); + if(pversion>5) plumed_cmd(plumedmain,"setNumOMPthreads",&nth); + } + } + /* END PLUMED */ + + /* Now do whatever the user wants us to do (how flexible...) */ + Integrator integrator { + fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(), + oenv, + mdrunOptions, + vsite.get(), constr.get(), + enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, + deform.get(), + mdModules->outputProvider(), + inputrec, &mtop, + fcd, + globalState.get(), + &observablesHistory, + mdAtoms.get(), nrnb, wcycle, fr, + &ppForceWorkload, + replExParams, + membed, + walltime_accounting, + std::move(stopHandlerBuilder_) + }; + integrator.run(inputrec->eI, doRerun); + + if (inputrec->bPull) + { + finish_pull(inputrec->pull_work); + } + + } + else + { + GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); + /* do PME only */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); + gmx_pmeonly(pmedata, cr, nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode); + } + + wallcycle_stop(wcycle, ewcRUN); + + /* Finish up, write some stuff + * if rerunMD, don't write last frame again + */ + finish_run(fplog, mdlog, cr, + inputrec, nrnb, wcycle, walltime_accounting, + fr ? fr->nbv : nullptr, + pmedata, + EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms)); + + // Free PME data + if (pmedata) + { + gmx_pme_destroy(pmedata); + pmedata = nullptr; + } + + // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x, + // before we destroy the GPU context(s) in free_gpu_resources(). + // Pinned buffers are associated with contexts in CUDA. + // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go. + mdAtoms.reset(nullptr); + globalState.reset(nullptr); + mdModules.reset(nullptr); // destruct force providers here as they might also use the GPU + + /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */ + free_gpu_resources(fr, physicalNodeComm); + free_gpu(nonbondedDeviceInfo); + free_gpu(pmeDeviceInfo); + done_forcerec(fr, mtop.molblock.size(), mtop.groups.grps[egcENER].nr); + sfree(fcd); + + if (doMembed) + { + free_membed(membed); + } + + gmx_hardware_info_free(); + + /* Does what it says */ + print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); + walltime_accounting_destroy(walltime_accounting); + sfree(nrnb); + + /* PLUMED */ + if(plumedswitch){ + plumed_finalize(plumedmain); + } + /* END PLUMED */ + + // Ensure log file content is written + if (logFileHandle) + { + gmx_fio_flush(logFileHandle); + } + + /* Reset FPEs (important for unit tests) by disabling them. Assumes no + * exceptions were enabled before function was called. */ + if (bEnableFPE) + { + gmx_fedisableexcept(); + } + + rc = static_cast<int>(gmx_get_stop_condition()); + +#if GMX_THREAD_MPI + /* we need to join all threads. The sub-threads join when they + exit this function, but the master thread needs to be told to + wait for that. */ + if (PAR(cr) && MASTER(cr)) + { + done_commrec(cr); + tMPI_Finalize(); + } +#endif + + return rc; +} + +Mdrunner::~Mdrunner() +{ + // Clean up of the Manager. + // This will end up getting called on every thread-MPI rank, which is unnecessary, + // but okay as long as threads synchronize some time before adding or accessing + // a new set of restraints. + if (restraintManager_) + { + restraintManager_->clear(); + GMX_ASSERT(restraintManager_->countRestraints() == 0, + "restraints added during runner life time should be cleared at runner destruction."); + } +}; + +void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller, + std::string name) +{ + GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager."); + // Not sure if this should be logged through the md logger or something else, + // but it is helpful to have some sort of INFO level message sent somewhere. + // std::cout << "Registering restraint named " << name << std::endl; + + // When multiple restraints are used, it may be wasteful to register them separately. + // Maybe instead register an entire Restraint Manager as a force provider. + restraintManager_->addToSpec(std::move(puller), + std::move(name)); +} + +Mdrunner::Mdrunner(Mdrunner &&) noexcept = default; + +//NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265 +Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default; + +class Mdrunner::BuilderImplementation +{ + public: + BuilderImplementation() = delete; + explicit BuilderImplementation(SimulationContext* context); + ~BuilderImplementation(); + + BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options, + real forceWarningThreshold); + + void addDomdec(const DomdecOptions &options); + + void addVerletList(int nstlist); + + void addReplicaExchange(const ReplicaExchangeParameters ¶ms); + + void addMultiSim(gmx_multisim_t* multisim); + + void addNonBonded(const char* nbpu_opt); + + void addPME(const char* pme_opt_, const char* pme_fft_opt_); + + void addBondedTaskAssignment(const char* bonded_opt); + + void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions); + + void addFilenames(ArrayRef <const t_filenm> filenames); + + void addOutputEnvironment(gmx_output_env_t* outputEnvironment); + + void addLogFile(t_fileio *logFileHandle); + + void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder); + + Mdrunner build(); + + private: + // Default parameters copied from runner.h + // \todo Clarify source(s) of default parameters. + + const char* nbpu_opt_ = nullptr; + const char* pme_opt_ = nullptr; + const char* pme_fft_opt_ = nullptr; + const char *bonded_opt_ = nullptr; + + MdrunOptions mdrunOptions_; + + DomdecOptions domdecOptions_; + + ReplicaExchangeParameters replicaExchangeParameters_; + + //! Command-line override for the duration of a neighbor list with the Verlet scheme. + int nstlist_ = 0; + + //! Non-owning multisim communicator handle. + std::unique_ptr<gmx_multisim_t*> multisim_ = nullptr; + + //! Print a warning if any force is larger than this (in kJ/mol nm). + real forceWarningThreshold_ = -1; + + /*! \brief Non-owning pointer to SimulationContext (owned and managed by client) + * + * \internal + * \todo Establish robust protocol to make sure resources remain valid. + * SimulationContext will likely be separated into multiple layers for + * different levels of access and different phases of execution. Ref + * https://redmine.gromacs.org/issues/2375 + * https://redmine.gromacs.org/issues/2587 + */ + SimulationContext* context_ = nullptr; + + //! \brief Parallelism information. + gmx_hw_opt_t hardwareOptions_; + + //! filename options for simulation. + ArrayRef<const t_filenm> filenames_; + + /*! \brief Handle to output environment. + * + * \todo gmx_output_env_t needs lifetime management. + */ + gmx_output_env_t* outputEnvironment_ = nullptr; + + /*! \brief Non-owning handle to MD log file. + * + * \todo Context should own output facilities for client. + * \todo Improve log file handle management. + * \internal + * Code managing the FILE* relies on the ability to set it to + * nullptr to check whether the filehandle is valid. + */ + t_fileio* logFileHandle_ = nullptr; + + /*! + * \brief Builder for simulation stop signal handler. + */ + std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr; +}; + +Mdrunner::BuilderImplementation::BuilderImplementation(SimulationContext* context) : + context_(context) +{ + GMX_ASSERT(context_, "Bug found. It should not be possible to construct builder without a valid context."); +} + +Mdrunner::BuilderImplementation::~BuilderImplementation() = default; + +Mdrunner::BuilderImplementation & +Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions &options, + real forceWarningThreshold) +{ + mdrunOptions_ = options; + forceWarningThreshold_ = forceWarningThreshold; + return *this; +} + +void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options) +{ + domdecOptions_ = options; +} + +void Mdrunner::BuilderImplementation::addVerletList(int nstlist) +{ + nstlist_ = nstlist; +} + +void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters ¶ms) +{ + replicaExchangeParameters_ = params; +} + +void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim) +{ + multisim_ = compat::make_unique<gmx_multisim_t*>(multisim); +} + +Mdrunner Mdrunner::BuilderImplementation::build() +{ + auto newRunner = Mdrunner(); + + GMX_ASSERT(context_, "Bug found. It should not be possible to call build() without a valid context."); + + newRunner.mdrunOptions = mdrunOptions_; + newRunner.domdecOptions = domdecOptions_; + + // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid + newRunner.hw_opt = hardwareOptions_; + + // No invariant to check. This parameter exists to optionally override other behavior. + newRunner.nstlist_cmdline = nstlist_; + + newRunner.replExParams = replicaExchangeParameters_; + + newRunner.filenames = filenames_; + + GMX_ASSERT(context_->communicationRecord_, "SimulationContext communications not initialized."); + newRunner.cr = context_->communicationRecord_; + + if (multisim_) + { + // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer. + newRunner.ms = *multisim_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()")); + } + + // \todo Clarify ownership and lifetime management for gmx_output_env_t + // \todo Update sanity checking when output environment has clearly specified invariants. + // Initialization and default values for oenv are not well specified in the current version. + if (outputEnvironment_) + { + newRunner.oenv = outputEnvironment_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()")); + } + + newRunner.logFileHandle = logFileHandle_; + + if (nbpu_opt_) + { + newRunner.nbpu_opt = nbpu_opt_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()")); + } + + if (pme_opt_ && pme_fft_opt_) + { + newRunner.pme_opt = pme_opt_; + newRunner.pme_fft_opt = pme_fft_opt_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()")); + } + + if (bonded_opt_) + { + newRunner.bonded_opt = bonded_opt_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()")); + } + + newRunner.restraintManager_ = compat::make_unique<gmx::RestraintManager>(); + + if (stopHandlerBuilder_) + { + newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_); + } + else + { + newRunner.stopHandlerBuilder_ = compat::make_unique<StopHandlerBuilder>(); + } + + return newRunner; +} + +void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt) +{ + nbpu_opt_ = nbpu_opt; +} + +void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, + const char* pme_fft_opt) +{ + pme_opt_ = pme_opt; + pme_fft_opt_ = pme_fft_opt; +} + +void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt) +{ + bonded_opt_ = bonded_opt; +} + +void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions) +{ + hardwareOptions_ = hardwareOptions; +} + +void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames) +{ + filenames_ = filenames; +} + +void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment) +{ + outputEnvironment_ = outputEnvironment; +} + +void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle) +{ + logFileHandle_ = logFileHandle; +} + +void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder) +{ + stopHandlerBuilder_ = std::move(builder); +} + +MdrunnerBuilder::MdrunnerBuilder(compat::not_null<SimulationContext*> context) : + impl_ {gmx::compat::make_unique<Mdrunner::BuilderImplementation>(context)} +{ +} + +MdrunnerBuilder::~MdrunnerBuilder() = default; + +MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions &options, + real forceWarningThreshold) +{ + impl_->setExtraMdrunOptions(options, forceWarningThreshold); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options) +{ + impl_->addDomdec(options); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist) +{ + impl_->addVerletList(nstlist); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters ¶ms) +{ + impl_->addReplicaExchange(params); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim) +{ + impl_->addMultiSim(multisim); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt) +{ + impl_->addNonBonded(nbpu_opt); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt, + const char* pme_fft_opt) +{ + // The builder method may become more general in the future, but in this version, + // parameters for PME electrostatics are both required and the only parameters + // available. + if (pme_opt && pme_fft_opt) + { + impl_->addPME(pme_opt, pme_fft_opt); + } + else + { + GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers.")); + } + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt) +{ + impl_->addBondedTaskAssignment(bonded_opt); + return *this; +} + +Mdrunner MdrunnerBuilder::build() +{ + return impl_->build(); +} + +MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions) +{ + impl_->addHardwareOptions(hardwareOptions); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames) +{ + impl_->addFilenames(filenames); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment) +{ + impl_->addOutputEnvironment(outputEnvironment); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle) +{ + impl_->addLogFile(logFileHandle); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder) +{ + impl_->addStopHandlerBuilder(std::move(builder)); + return *this; +} + +MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default; + +MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default; + +} // namespace gmx diff --git a/patches/gromacs-2019.1.diff/src/gromacs/mdrun/runner.cpp.preplumed b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/runner.cpp.preplumed new file mode 100644 index 0000000000000000000000000000000000000000..ae158a4984deefd1d61ca9e2a912143db1c75e05 --- /dev/null +++ b/patches/gromacs-2019.1.diff/src/gromacs/mdrun/runner.cpp.preplumed @@ -0,0 +1,1932 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements the MD runner routine calling all integrators. + * + * \author David van der Spoel <david.vanderspoel@icm.uu.se> + * \ingroup module_mdrun + */ +#include "gmxpre.h" + +#include "runner.h" + +#include "config.h" + +#include <cassert> +#include <cinttypes> +#include <csignal> +#include <cstdlib> +#include <cstring> + +#include <algorithm> + +#include "gromacs/commandline/filenm.h" +#include "gromacs/compat/make_unique.h" +#include "gromacs/domdec/domdec.h" +#include "gromacs/domdec/domdec_struct.h" +#include "gromacs/domdec/localatomsetmanager.h" +#include "gromacs/ewald/ewald-utils.h" +#include "gromacs/ewald/pme.h" +#include "gromacs/ewald/pme-gpu-program.h" +#include "gromacs/fileio/checkpoint.h" +#include "gromacs/fileio/gmxfio.h" +#include "gromacs/fileio/oenv.h" +#include "gromacs/fileio/tpxio.h" +#include "gromacs/gmxlib/network.h" +#include "gromacs/gmxlib/nrnb.h" +#include "gromacs/gpu_utils/clfftinitializer.h" +#include "gromacs/gpu_utils/gpu_utils.h" +#include "gromacs/hardware/cpuinfo.h" +#include "gromacs/hardware/detecthardware.h" +#include "gromacs/hardware/printhardware.h" +#include "gromacs/listed-forces/disre.h" +#include "gromacs/listed-forces/gpubonded.h" +#include "gromacs/listed-forces/orires.h" +#include "gromacs/math/functions.h" +#include "gromacs/math/utilities.h" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/boxdeformation.h" +#include "gromacs/mdlib/calc_verletbuf.h" +#include "gromacs/mdlib/forcerec.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" +#include "gromacs/mdlib/makeconstraints.h" +#include "gromacs/mdlib/md_support.h" +#include "gromacs/mdlib/mdatoms.h" +#include "gromacs/mdlib/mdrun.h" +#include "gromacs/mdlib/membed.h" +#include "gromacs/mdlib/nb_verlet.h" +#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h" +#include "gromacs/mdlib/nbnxn_search.h" +#include "gromacs/mdlib/nbnxn_tuning.h" +#include "gromacs/mdlib/ppforceworkload.h" +#include "gromacs/mdlib/qmmm.h" +#include "gromacs/mdlib/sighandler.h" +#include "gromacs/mdlib/sim_util.h" +#include "gromacs/mdlib/stophandler.h" +#include "gromacs/mdrun/legacymdrunoptions.h" +#include "gromacs/mdrun/logging.h" +#include "gromacs/mdrun/multisim.h" +#include "gromacs/mdrun/simulationcontext.h" +#include "gromacs/mdrunutility/mdmodules.h" +#include "gromacs/mdrunutility/threadaffinity.h" +#include "gromacs/mdtypes/commrec.h" +#include "gromacs/mdtypes/fcdata.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/mdtypes/observableshistory.h" +#include "gromacs/mdtypes/state.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pulling/output.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/pulling/pull_rotation.h" +#include "gromacs/restraint/manager.h" +#include "gromacs/restraint/restraintmdmodule.h" +#include "gromacs/restraint/restraintpotential.h" +#include "gromacs/swap/swapcoords.h" +#include "gromacs/taskassignment/decidegpuusage.h" +#include "gromacs/taskassignment/resourcedivision.h" +#include "gromacs/taskassignment/taskassignment.h" +#include "gromacs/taskassignment/usergpuids.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/topology/mtop_util.h" +#include "gromacs/trajectory/trajectoryframe.h" +#include "gromacs/utility/basenetwork.h" +#include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/fatalerror.h" +#include "gromacs/utility/filestream.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/utility/logger.h" +#include "gromacs/utility/loggerbuilder.h" +#include "gromacs/utility/physicalnodecommunicator.h" +#include "gromacs/utility/pleasecite.h" +#include "gromacs/utility/programcontext.h" +#include "gromacs/utility/smalloc.h" +#include "gromacs/utility/stringutil.h" + +#include "integrator.h" +#include "replicaexchange.h" + +#if GMX_FAHCORE +#include "corewrap.h" +#endif + +namespace gmx +{ + +/*! \brief Barrier for safe simultaneous thread access to mdrunner data + * + * Used to ensure that the master thread does not modify mdrunner during copy + * on the spawned threads. */ +static void threadMpiMdrunnerAccessBarrier() +{ +#if GMX_THREAD_MPI + MPI_Barrier(MPI_COMM_WORLD); +#endif +} + +Mdrunner Mdrunner::cloneOnSpawnedThread() const +{ + auto newRunner = Mdrunner(); + + // All runners in the same process share a restraint manager resource because it is + // part of the interface to the client code, which is associated only with the + // original thread. Handles to the same resources can be obtained by copy. + { + newRunner.restraintManager_ = compat::make_unique<RestraintManager>(*restraintManager_); + } + + // Copy original cr pointer before master thread can pass the thread barrier + newRunner.cr = reinitialize_commrec_for_this_thread(cr); + + // Copy members of master runner. + // \todo Replace with builder when Simulation context and/or runner phases are better defined. + // Ref https://redmine.gromacs.org/issues/2587 and https://redmine.gromacs.org/issues/2375 + newRunner.hw_opt = hw_opt; + newRunner.filenames = filenames; + + newRunner.oenv = oenv; + newRunner.mdrunOptions = mdrunOptions; + newRunner.domdecOptions = domdecOptions; + newRunner.nbpu_opt = nbpu_opt; + newRunner.pme_opt = pme_opt; + newRunner.pme_fft_opt = pme_fft_opt; + newRunner.bonded_opt = bonded_opt; + newRunner.nstlist_cmdline = nstlist_cmdline; + newRunner.replExParams = replExParams; + newRunner.pforce = pforce; + newRunner.ms = ms; + newRunner.stopHandlerBuilder_ = compat::make_unique<StopHandlerBuilder>(*stopHandlerBuilder_); + + threadMpiMdrunnerAccessBarrier(); + + GMX_RELEASE_ASSERT(!MASTER(newRunner.cr), "cloneOnSpawnedThread should only be called on spawned threads"); + + return newRunner; +} + +/*! \brief The callback used for running on spawned threads. + * + * Obtains the pointer to the master mdrunner object from the one + * argument permitted to the thread-launch API call, copies it to make + * a new runner for this thread, reinitializes necessary data, and + * proceeds to the simulation. */ +static void mdrunner_start_fn(const void *arg) +{ + try + { + auto masterMdrunner = reinterpret_cast<const gmx::Mdrunner *>(arg); + /* copy the arg list to make sure that it's thread-local. This + doesn't copy pointed-to items, of course; fnm, cr and fplog + are reset in the call below, all others should be const. */ + gmx::Mdrunner mdrunner = masterMdrunner->cloneOnSpawnedThread(); + mdrunner.mdrunner(); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +} + + +/*! \brief Start thread-MPI threads. + * + * Called by mdrunner() to start a specific number of threads + * (including the main thread) for thread-parallel runs. This in turn + * calls mdrunner() for each thread. All options are the same as for + * mdrunner(). */ +t_commrec *Mdrunner::spawnThreads(int numThreadsToLaunch) const +{ + + /* first check whether we even need to start tMPI */ + if (numThreadsToLaunch < 2) + { + return cr; + } + +#if GMX_THREAD_MPI + /* now spawn new threads that start mdrunner_start_fn(), while + the main thread returns, we set thread affinity later */ + if (tMPI_Init_fn(TRUE, numThreadsToLaunch, TMPI_AFFINITY_NONE, + mdrunner_start_fn, static_cast<const void*>(this)) != TMPI_SUCCESS) + { + GMX_THROW(gmx::InternalError("Failed to spawn thread-MPI threads")); + } + + threadMpiMdrunnerAccessBarrier(); +#else + GMX_UNUSED_VALUE(mdrunner_start_fn); +#endif + + return reinitialize_commrec_for_this_thread(cr); +} + +} // namespace gmx + +/*! \brief Initialize variables for Verlet scheme simulation */ +static void prepare_verlet_scheme(FILE *fplog, + t_commrec *cr, + t_inputrec *ir, + int nstlist_cmdline, + const gmx_mtop_t *mtop, + const matrix box, + bool makeGpuPairList, + const gmx::CpuInfo &cpuinfo) +{ + /* For NVE simulations, we will retain the initial list buffer */ + if (EI_DYNAMICS(ir->eI) && + ir->verletbuf_tol > 0 && + !(EI_MD(ir->eI) && ir->etc == etcNO)) + { + /* Update the Verlet buffer size for the current run setup */ + + /* Here we assume SIMD-enabled kernels are being used. But as currently + * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 + * and 4x2 gives a larger buffer than 4x4, this is ok. + */ + ListSetupType listType = (makeGpuPairList ? ListSetupType::Gpu : ListSetupType::CpuSimdWhenSupported); + VerletbufListSetup listSetup = verletbufGetSafeListSetup(listType); + + real rlist_new; + calc_verlet_buffer_size(mtop, det(box), ir, ir->nstlist, ir->nstlist - 1, -1, &listSetup, nullptr, &rlist_new); + + if (rlist_new != ir->rlist) + { + if (fplog != nullptr) + { + fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", + ir->rlist, rlist_new, + listSetup.cluster_size_i, listSetup.cluster_size_j); + } + ir->rlist = rlist_new; + } + } + + if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0)) + { + gmx_fatal(FARGS, "Can not set nstlist without %s", + !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance"); + } + + if (EI_DYNAMICS(ir->eI)) + { + /* Set or try nstlist values */ + increaseNstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, makeGpuPairList, cpuinfo); + } +} + +/*! \brief Override the nslist value in inputrec + * + * with value passed on the command line (if any) + */ +static void override_nsteps_cmdline(const gmx::MDLogger &mdlog, + int64_t nsteps_cmdline, + t_inputrec *ir) +{ + assert(ir); + + /* override with anything else than the default -2 */ + if (nsteps_cmdline > -2) + { + char sbuf_steps[STEPSTRSIZE]; + char sbuf_msg[STRLEN]; + + ir->nsteps = nsteps_cmdline; + if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1) + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps", + gmx_step_str(nsteps_cmdline, sbuf_steps), + fabs(nsteps_cmdline*ir->delta_t)); + } + else + { + sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps", + gmx_step_str(nsteps_cmdline, sbuf_steps)); + } + + GMX_LOG(mdlog.warning).asParagraph().appendText(sbuf_msg); + } + else if (nsteps_cmdline < -2) + { + gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %" PRId64, + nsteps_cmdline); + } + /* Do nothing if nsteps_cmdline == -2 */ +} + +namespace gmx +{ + +/*! \brief Return whether GPU acceleration of nonbondeds is supported with the given settings. + * + * If not, and if a warning may be issued, logs a warning about + * falling back to CPU code. With thread-MPI, only the first + * call to this function should have \c issueWarning true. */ +static bool gpuAccelerationOfNonbondedIsUseful(const MDLogger &mdlog, + const t_inputrec *ir, + bool issueWarning) +{ + if (ir->opts.ngener - ir->nwall > 1) + { + /* The GPU code does not support more than one energy group. + * If the user requested GPUs explicitly, a fatal error is given later. + */ + if (issueWarning) + { + GMX_LOG(mdlog.warning).asParagraph() + .appendText("Multiple energy groups is not implemented for GPUs, falling back to the CPU. " + "For better performance, run on the GPU without energy groups and then do " + "gmx mdrun -rerun option on the trajectory with an energy group .tpr file."); + } + return false; + } + return true; +} + +//! Initializes the logger for mdrun. +static gmx::LoggerOwner buildLogger(FILE *fplog, const t_commrec *cr) +{ + gmx::LoggerBuilder builder; + if (fplog != nullptr) + { + builder.addTargetFile(gmx::MDLogger::LogLevel::Info, fplog); + } + if (cr == nullptr || SIMMASTER(cr)) + { + builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, + &gmx::TextOutputFile::standardError()); + } + return builder.build(); +} + +//! Make a TaskTarget from an mdrun argument string. +static TaskTarget findTaskTarget(const char *optionString) +{ + TaskTarget returnValue = TaskTarget::Auto; + + if (strncmp(optionString, "auto", 3) == 0) + { + returnValue = TaskTarget::Auto; + } + else if (strncmp(optionString, "cpu", 3) == 0) + { + returnValue = TaskTarget::Cpu; + } + else if (strncmp(optionString, "gpu", 3) == 0) + { + returnValue = TaskTarget::Gpu; + } + else + { + GMX_ASSERT(false, "Option string should have been checked for sanity already"); + } + + return returnValue; +} + +int Mdrunner::mdrunner() +{ + matrix box; + t_nrnb *nrnb; + t_forcerec *fr = nullptr; + t_fcdata *fcd = nullptr; + real ewaldcoeff_q = 0; + real ewaldcoeff_lj = 0; + int nChargePerturbed = -1, nTypePerturbed = 0; + gmx_wallcycle_t wcycle; + gmx_walltime_accounting_t walltime_accounting = nullptr; + int rc; + int64_t reset_counters; + int nthreads_pme = 1; + gmx_membed_t * membed = nullptr; + gmx_hw_info_t *hwinfo = nullptr; + + /* CAUTION: threads may be started later on in this function, so + cr doesn't reflect the final parallel state right now */ + std::unique_ptr<gmx::MDModules> mdModules(new gmx::MDModules); + t_inputrec inputrecInstance; + t_inputrec *inputrec = &inputrecInstance; + gmx_mtop_t mtop; + + bool doMembed = opt2bSet("-membed", filenames.size(), filenames.data()); + bool doRerun = mdrunOptions.rerun; + + // Handle task-assignment related user options. + EmulateGpuNonbonded emulateGpuNonbonded = (getenv("GMX_EMULATE_GPU") != nullptr ? + EmulateGpuNonbonded::Yes : EmulateGpuNonbonded::No); + std::vector<int> gpuIdsAvailable; + try + { + gpuIdsAvailable = parseUserGpuIds(hw_opt.gpuIdsAvailable); + // TODO We could put the GPU IDs into a std::map to find + // duplicates, but for the small numbers of IDs involved, this + // code is simple and fast. + for (size_t i = 0; i != gpuIdsAvailable.size(); ++i) + { + for (size_t j = i+1; j != gpuIdsAvailable.size(); ++j) + { + if (gpuIdsAvailable[i] == gpuIdsAvailable[j]) + { + GMX_THROW(InvalidInputError(formatString("The string of available GPU device IDs '%s' may not contain duplicate device IDs", hw_opt.gpuIdsAvailable.c_str()))); + } + } + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + std::vector<int> userGpuTaskAssignment; + try + { + userGpuTaskAssignment = parseUserGpuIds(hw_opt.userGpuTaskAssignment); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + auto nonbondedTarget = findTaskTarget(nbpu_opt); + auto pmeTarget = findTaskTarget(pme_opt); + auto pmeFftTarget = findTaskTarget(pme_fft_opt); + auto bondedTarget = findTaskTarget(bonded_opt); + PmeRunMode pmeRunMode = PmeRunMode::None; + + // Here we assume that SIMMASTER(cr) does not change even after the + // threads are started. + + FILE *fplog = nullptr; + // If we are appending, we don't write log output because we need + // to check that the old log file matches what the checkpoint file + // expects. Otherwise, we should start to write log output now if + // there is a file ready for it. + if (logFileHandle != nullptr && !mdrunOptions.continuationOptions.appendFiles) + { + fplog = gmx_fio_getfp(logFileHandle); + } + gmx::LoggerOwner logOwner(buildLogger(fplog, cr)); + gmx::MDLogger mdlog(logOwner.logger()); + + // TODO The thread-MPI master rank makes a working + // PhysicalNodeCommunicator here, but it gets rebuilt by all ranks + // after the threads have been launched. This works because no use + // is made of that communicator until after the execution paths + // have rejoined. But it is likely that we can improve the way + // this is expressed, e.g. by expressly running detection only the + // master rank for thread-MPI, rather than relying on the mutex + // and reference count. + PhysicalNodeCommunicator physicalNodeComm(MPI_COMM_WORLD, gmx_physicalnode_id_hash()); + hwinfo = gmx_detect_hardware(mdlog, physicalNodeComm); + + gmx_print_detected_hardware(fplog, cr, ms, mdlog, hwinfo); + + std::vector<int> gpuIdsToUse; + auto compatibleGpus = getCompatibleGpus(hwinfo->gpu_info); + if (gpuIdsAvailable.empty()) + { + gpuIdsToUse = compatibleGpus; + } + else + { + for (const auto &availableGpuId : gpuIdsAvailable) + { + bool availableGpuIsCompatible = false; + for (const auto &compatibleGpuId : compatibleGpus) + { + if (availableGpuId == compatibleGpuId) + { + availableGpuIsCompatible = true; + break; + } + } + if (!availableGpuIsCompatible) + { + gmx_fatal(FARGS, "You limited the set of compatible GPUs to a set that included ID #%d, but that ID is not for a compatible GPU. List only compatible GPUs.", availableGpuId); + } + gpuIdsToUse.push_back(availableGpuId); + } + } + + if (fplog != nullptr) + { + /* Print references after all software/hardware printing */ + please_cite(fplog, "Abraham2015"); + please_cite(fplog, "Pall2015"); + please_cite(fplog, "Pronk2013"); + please_cite(fplog, "Hess2008b"); + please_cite(fplog, "Spoel2005a"); + please_cite(fplog, "Lindahl2001a"); + please_cite(fplog, "Berendsen95a"); + writeSourceDoi(fplog); + } + + std::unique_ptr<t_state> globalState; + + if (SIMMASTER(cr)) + { + /* Only the master rank has the global state */ + globalState = compat::make_unique<t_state>(); + + /* Read (nearly) all data required for the simulation */ + read_tpx_state(ftp2fn(efTPR, filenames.size(), filenames.data()), inputrec, globalState.get(), &mtop); + + /* In rerun, set velocities to zero if present */ + if (doRerun && ((globalState->flags & (1 << estV)) != 0)) + { + // rerun does not use velocities + GMX_LOG(mdlog.info).asParagraph().appendText( + "Rerun trajectory contains velocities. Rerun does only evaluate " + "potential energy and forces. The velocities will be ignored."); + for (int i = 0; i < globalState->natoms; i++) + { + clear_rvec(globalState->v[i]); + } + globalState->flags &= ~(1 << estV); + } + + if (inputrec->cutoff_scheme != ecutsVERLET) + { + if (nstlist_cmdline > 0) + { + gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme"); + } + + if (!compatibleGpus.empty()) + { + GMX_LOG(mdlog.warning).asParagraph().appendText( + "NOTE: GPU(s) found, but the current simulation can not use GPUs\n" + " To use a GPU, set the mdp option: cutoff-scheme = Verlet"); + } + } + } + + /* Check and update the hardware options for internal consistency */ + check_and_update_hw_opt_1(mdlog, &hw_opt, cr, domdecOptions.numPmeRanks); + + /* Early check for externally set process affinity. */ + gmx_check_thread_affinity_set(mdlog, cr, + &hw_opt, hwinfo->nthreads_hw_avail, FALSE); + + if (GMX_THREAD_MPI && SIMMASTER(cr)) + { + if (domdecOptions.numPmeRanks > 0 && hw_opt.nthreads_tmpi <= 0) + { + gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks"); + } + + /* Since the master knows the cut-off scheme, update hw_opt for this. + * This is done later for normal MPI and also once more with tMPI + * for all tMPI ranks. + */ + check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme); + + bool useGpuForNonbonded = false; + bool useGpuForPme = false; + try + { + // If the user specified the number of ranks, then we must + // respect that, but in default mode, we need to allow for + // the number of GPUs to choose the number of ranks. + auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); + useGpuForNonbonded = decideWhetherToUseGpusForNonbondedWithThreadMpi + (nonbondedTarget, gpuIdsToUse, userGpuTaskAssignment, emulateGpuNonbonded, + canUseGpuForNonbonded, + inputrec->cutoff_scheme == ecutsVERLET, + gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, GMX_THREAD_MPI), + hw_opt.nthreads_tmpi); + useGpuForPme = decideWhetherToUseGpusForPmeWithThreadMpi + (useGpuForNonbonded, pmeTarget, gpuIdsToUse, userGpuTaskAssignment, + *hwinfo, *inputrec, mtop, hw_opt.nthreads_tmpi, domdecOptions.numPmeRanks); + + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + /* Determine how many thread-MPI ranks to start. + * + * TODO Over-writing the user-supplied value here does + * prevent any possible subsequent checks from working + * correctly. */ + hw_opt.nthreads_tmpi = get_nthreads_mpi(hwinfo, + &hw_opt, + gpuIdsToUse, + useGpuForNonbonded, + useGpuForPme, + inputrec, &mtop, + mdlog, + doMembed); + + // Now start the threads for thread MPI. + cr = spawnThreads(hw_opt.nthreads_tmpi); + /* The main thread continues here with a new cr. We don't deallocate + the old cr because other threads may still be reading it. */ + // TODO Both master and spawned threads call dup_tfn and + // reinitialize_commrec_for_this_thread. Find a way to express + // this better. + physicalNodeComm = PhysicalNodeCommunicator(MPI_COMM_WORLD, gmx_physicalnode_id_hash()); + } + // END OF CAUTION: cr and physicalNodeComm are now reliable + + if (PAR(cr)) + { + /* now broadcast everything to the non-master nodes/threads: */ + init_parallel(cr, inputrec, &mtop); + } + + // Now each rank knows the inputrec that SIMMASTER read and used, + // and (if applicable) cr->nnodes has been assigned the number of + // thread-MPI ranks that have been chosen. The ranks can now all + // run the task-deciding functions and will agree on the result + // without needing to communicate. + // + // TODO Should we do the communication in debug mode to support + // having an assertion? + // + // Note that these variables describe only their own node. + // + // Note that when bonded interactions run on a GPU they always run + // alongside a nonbonded task, so do not influence task assignment + // even though they affect the force calculation workload. + bool useGpuForNonbonded = false; + bool useGpuForPme = false; + bool useGpuForBonded = false; + try + { + // It's possible that there are different numbers of GPUs on + // different nodes, which is the user's responsibilty to + // handle. If unsuitable, we will notice that during task + // assignment. + bool gpusWereDetected = hwinfo->ngpu_compatible_tot > 0; + bool usingVerletScheme = inputrec->cutoff_scheme == ecutsVERLET; + auto canUseGpuForNonbonded = buildSupportsNonbondedOnGpu(nullptr); + useGpuForNonbonded = decideWhetherToUseGpusForNonbonded(nonbondedTarget, userGpuTaskAssignment, + emulateGpuNonbonded, + canUseGpuForNonbonded, + usingVerletScheme, + gpuAccelerationOfNonbondedIsUseful(mdlog, inputrec, !GMX_THREAD_MPI), + gpusWereDetected); + useGpuForPme = decideWhetherToUseGpusForPme(useGpuForNonbonded, pmeTarget, userGpuTaskAssignment, + *hwinfo, *inputrec, mtop, + cr->nnodes, domdecOptions.numPmeRanks, + gpusWereDetected); + auto canUseGpuForBonded = buildSupportsGpuBondeds(nullptr) && inputSupportsGpuBondeds(*inputrec, mtop, nullptr); + useGpuForBonded = + decideWhetherToUseGpusForBonded(useGpuForNonbonded, useGpuForPme, usingVerletScheme, + bondedTarget, canUseGpuForBonded, + EVDW_PME(inputrec->vdwtype), + EEL_PME_EWALD(inputrec->coulombtype), + domdecOptions.numPmeRanks, gpusWereDetected); + + pmeRunMode = (useGpuForPme ? PmeRunMode::GPU : PmeRunMode::CPU); + if (pmeRunMode == PmeRunMode::GPU) + { + if (pmeFftTarget == TaskTarget::Cpu) + { + pmeRunMode = PmeRunMode::Mixed; + } + } + else if (pmeFftTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME on CPU you should not be using -pmefft."); + } + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + // Build restraints. + // TODO: hide restraint implementation details from Mdrunner. + // There is nothing unique about restraints at this point as far as the + // Mdrunner is concerned. The Mdrunner should just be getting a sequence of + // factory functions from the SimulationContext on which to call mdModules->add(). + // TODO: capture all restraints into a single RestraintModule, passed to the runner builder. + for (auto && restraint : restraintManager_->getRestraints()) + { + auto module = RestraintMDModule::create(restraint, + restraint->sites()); + mdModules->add(std::move(module)); + } + + // TODO: Error handling + mdModules->assignOptionsToModules(*inputrec->params, nullptr); + + if (fplog != nullptr) + { + pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE); + fprintf(fplog, "\n"); + } + + if (SIMMASTER(cr)) + { + /* now make sure the state is initialized and propagated */ + set_state_entries(globalState.get(), inputrec); + } + + /* NM and TPI parallelize over force/energy calculations, not atoms, + * so we need to initialize and broadcast the global state. + */ + if (inputrec->eI == eiNM || inputrec->eI == eiTPI) + { + if (!MASTER(cr)) + { + globalState = compat::make_unique<t_state>(); + } + broadcastStateWithoutDynamics(cr, globalState.get()); + } + + /* A parallel command line option consistency check that we can + only do after any threads have started. */ + if (!PAR(cr) && (domdecOptions.numCells[XX] > 1 || + domdecOptions.numCells[YY] > 1 || + domdecOptions.numCells[ZZ] > 1 || + domdecOptions.numPmeRanks > 0)) + { + gmx_fatal(FARGS, + "The -dd or -npme option request a parallel simulation, " +#if !GMX_MPI + "but %s was compiled without threads or MPI enabled", output_env_get_program_display_name(oenv)); +#else +#if GMX_THREAD_MPI + "but the number of MPI-threads (option -ntmpi) is not set or is 1"); +#else + "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec", output_env_get_program_display_name(oenv)); +#endif +#endif + } + + if (doRerun && + (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) + { + gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun"); + } + + if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank"); + } + + if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))) + { + if (domdecOptions.numPmeRanks > 0) + { + gmx_fatal_collective(FARGS, cr->mpi_comm_mysim, MASTER(cr), + "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ"); + } + + domdecOptions.numPmeRanks = 0; + } + + if (useGpuForNonbonded && domdecOptions.numPmeRanks < 0) + { + /* With NB GPUs we don't automatically use PME-only CPU ranks. PME ranks can + * improve performance with many threads per GPU, since our OpenMP + * scaling is bad, but it's difficult to automate the setup. + */ + domdecOptions.numPmeRanks = 0; + } + if (useGpuForPme) + { + if (domdecOptions.numPmeRanks < 0) + { + domdecOptions.numPmeRanks = 0; + // TODO possibly print a note that one can opt-in for a separate PME GPU rank? + } + else + { + GMX_RELEASE_ASSERT(domdecOptions.numPmeRanks <= 1, "PME GPU decomposition is not supported"); + } + } + +#if GMX_FAHCORE + if (MASTER(cr)) + { + fcRegisterSteps(inputrec->nsteps, inputrec->init_step); + } +#endif + + /* NMR restraints must be initialized before load_checkpoint, + * since with time averaging the history is added to t_state. + * For proper consistency check we therefore need to extend + * t_state here. + * So the PME-only nodes (if present) will also initialize + * the distance restraints. + */ + snew(fcd, 1); + + /* This needs to be called before read_checkpoint to extend the state */ + init_disres(fplog, &mtop, inputrec, cr, ms, fcd, globalState.get(), replExParams.exchangeInterval > 0); + + init_orires(fplog, &mtop, inputrec, cr, ms, globalState.get(), &(fcd->orires)); + + auto deform = prepareBoxDeformation(globalState->box, cr, *inputrec); + + ObservablesHistory observablesHistory = {}; + + ContinuationOptions &continuationOptions = mdrunOptions.continuationOptions; + + if (continuationOptions.startedFromCheckpoint) + { + /* Check if checkpoint file exists before doing continuation. + * This way we can use identical input options for the first and subsequent runs... + */ + gmx_bool bReadEkin; + + load_checkpoint(opt2fn_master("-cpi", filenames.size(), filenames.data(), cr), + logFileHandle, + cr, domdecOptions.numCells, + inputrec, globalState.get(), + &bReadEkin, &observablesHistory, + continuationOptions.appendFiles, + continuationOptions.appendFilesOptionSet, + mdrunOptions.reproducible); + + if (bReadEkin) + { + continuationOptions.haveReadEkin = true; + } + + if (continuationOptions.appendFiles && logFileHandle) + { + // Now we can start normal logging to the truncated log file. + fplog = gmx_fio_getfp(logFileHandle); + prepareLogAppending(fplog); + logOwner = buildLogger(fplog, cr); + mdlog = logOwner.logger(); + } + } + + if (mdrunOptions.numStepsCommandline > -2) + { + GMX_LOG(mdlog.info).asParagraph(). + appendText("The -nsteps functionality is deprecated, and may be removed in a future version. " + "Consider using gmx convert-tpr -nsteps or changing the appropriate .mdp file field."); + } + /* override nsteps with value set on the commamdline */ + override_nsteps_cmdline(mdlog, mdrunOptions.numStepsCommandline, inputrec); + + if (SIMMASTER(cr)) + { + copy_mat(globalState->box, box); + } + + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + + /* Update rlist and nstlist. */ + if (inputrec->cutoff_scheme == ecutsVERLET) + { + prepare_verlet_scheme(fplog, cr, inputrec, nstlist_cmdline, &mtop, box, + useGpuForNonbonded || (emulateGpuNonbonded == EmulateGpuNonbonded::Yes), *hwinfo->cpuInfo); + } + + LocalAtomSetManager atomSets; + + if (PAR(cr) && !(EI_TPI(inputrec->eI) || + inputrec->eI == eiNM)) + { + cr->dd = init_domain_decomposition(mdlog, cr, domdecOptions, mdrunOptions, + &mtop, inputrec, + box, positionsFromStatePointer(globalState.get()), + &atomSets); + // Note that local state still does not exist yet. + } + else + { + /* PME, if used, is done on all nodes with 1D decomposition */ + cr->npmenodes = 0; + cr->duty = (DUTY_PP | DUTY_PME); + + if (inputrec->ePBC == epbcSCREW) + { + gmx_fatal(FARGS, + "pbc=screw is only implemented with domain decomposition"); + } + } + + if (PAR(cr)) + { + /* After possible communicator splitting in make_dd_communicators. + * we can set up the intra/inter node communication. + */ + gmx_setup_nodecomm(fplog, cr); + } + +#if GMX_MPI + if (isMultiSim(ms)) + { + GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted( + "This is simulation %d out of %d running as a composite GROMACS\n" + "multi-simulation job. Setup for this simulation:\n", + ms->sim, ms->nsim); + } + GMX_LOG(mdlog.warning).appendTextFormatted( + "Using %d MPI %s\n", + cr->nnodes, +#if GMX_THREAD_MPI + cr->nnodes == 1 ? "thread" : "threads" +#else + cr->nnodes == 1 ? "process" : "processes" +#endif + ); + fflush(stderr); +#endif + + /* Check and update hw_opt for the cut-off scheme */ + check_and_update_hw_opt_2(&hw_opt, inputrec->cutoff_scheme); + + /* Check and update the number of OpenMP threads requested */ + checkAndUpdateRequestedNumOpenmpThreads(&hw_opt, *hwinfo, cr, ms, physicalNodeComm.size_, + pmeRunMode, mtop); + + gmx_omp_nthreads_init(mdlog, cr, + hwinfo->nthreads_hw_avail, + physicalNodeComm.size_, + hw_opt.nthreads_omp, + hw_opt.nthreads_omp_pme, + !thisRankHasDuty(cr, DUTY_PP), + inputrec->cutoff_scheme == ecutsVERLET); + + // Enable FP exception detection for the Verlet scheme, but not in + // Release mode and not for compilers with known buggy FP + // exception support (clang with any optimization) or suspected + // buggy FP exception support (gcc 7.* with optimization). +#if !defined NDEBUG && \ + !((defined __clang__ || (defined(__GNUC__) && !defined(__ICC) && __GNUC__ == 7)) \ + && defined __OPTIMIZE__) + const bool bEnableFPE = inputrec->cutoff_scheme == ecutsVERLET; +#else + const bool bEnableFPE = false; +#endif + //FIXME - reconcile with gmx_feenableexcept() call from CommandLineModuleManager::run() + if (bEnableFPE) + { + gmx_feenableexcept(); + } + + // Build a data structure that expresses which kinds of non-bonded + // task are handled by this rank. + // + // TODO Later, this might become a loop over all registered modules + // relevant to the mdp inputs, to find those that have such tasks. + // + // TODO This could move before init_domain_decomposition() as part + // of refactoring that separates the responsibility for duty + // assignment from setup for communication between tasks, and + // setup for tasks handled with a domain (ie including short-ranged + // tasks, bonded tasks, etc.). + // + // Note that in general useGpuForNonbonded, etc. can have a value + // that is inconsistent with the presence of actual GPUs on any + // rank, and that is not known to be a problem until the + // duty of the ranks on a node become known. + // + // TODO Later we might need the concept of computeTasksOnThisRank, + // from which we construct gpuTasksOnThisRank. + // + // Currently the DD code assigns duty to ranks that can + // include PP work that currently can be executed on a single + // GPU, if present and compatible. This has to be coordinated + // across PP ranks on a node, with possible multiple devices + // or sharing devices on a node, either from the user + // selection, or automatically. + auto haveGpus = !gpuIdsToUse.empty(); + std::vector<GpuTask> gpuTasksOnThisRank; + if (thisRankHasDuty(cr, DUTY_PP)) + { + if (useGpuForNonbonded) + { + // Note that any bonded tasks on a GPU always accompany a + // non-bonded task. + if (haveGpus) + { + gpuTasksOnThisRank.push_back(GpuTask::Nonbonded); + } + else if (nonbondedTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Cannot run short-ranged nonbonded interactions on a GPU because there is none detected."); + } + else if (bondedTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Cannot run bonded interactions on a GPU because there is none detected."); + } + } + } + // TODO cr->duty & DUTY_PME should imply that a PME algorithm is active, but currently does not. + if (EEL_PME(inputrec->coulombtype) && (thisRankHasDuty(cr, DUTY_PME))) + { + if (useGpuForPme) + { + if (haveGpus) + { + gpuTasksOnThisRank.push_back(GpuTask::Pme); + } + else if (pmeTarget == TaskTarget::Gpu) + { + gmx_fatal(FARGS, "Cannot run PME on a GPU because there is none detected."); + } + } + } + + GpuTaskAssignment gpuTaskAssignment; + try + { + // Produce the task assignment for this rank. + gpuTaskAssignment = runTaskAssignment(gpuIdsToUse, userGpuTaskAssignment, *hwinfo, + mdlog, cr, ms, physicalNodeComm, gpuTasksOnThisRank, + useGpuForBonded, pmeRunMode); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + + /* Prevent other ranks from continuing after an issue was found + * and reported as a fatal error. + * + * TODO This function implements a barrier so that MPI runtimes + * can organize an orderly shutdown if one of the ranks has had to + * issue a fatal error in various code already run. When we have + * MPI-aware error handling and reporting, this should be + * improved. */ +#if GMX_MPI + if (PAR(cr)) + { + MPI_Barrier(cr->mpi_comm_mysim); + } + if (isMultiSim(ms)) + { + if (SIMMASTER(cr)) + { + MPI_Barrier(ms->mpi_comm_masters); + } + /* We need another barrier to prevent non-master ranks from contiuing + * when an error occured in a different simulation. + */ + MPI_Barrier(cr->mpi_comm_mysim); + } +#endif + + /* Now that we know the setup is consistent, check for efficiency */ + check_resource_division_efficiency(hwinfo, !gpuTaskAssignment.empty(), mdrunOptions.ntompOptionIsSet, + cr, mdlog); + + gmx_device_info_t *nonbondedDeviceInfo = nullptr; + + if (thisRankHasDuty(cr, DUTY_PP)) + { + // This works because only one task of each type is currently permitted. + auto nbGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), + hasTaskType<GpuTask::Nonbonded>); + if (nbGpuTaskMapping != gpuTaskAssignment.end()) + { + int nonbondedDeviceId = nbGpuTaskMapping->deviceId_; + nonbondedDeviceInfo = getDeviceInfo(hwinfo->gpu_info, nonbondedDeviceId); + init_gpu(nonbondedDeviceInfo); + + if (DOMAINDECOMP(cr)) + { + /* When we share GPUs over ranks, we need to know this for the DLB */ + dd_setup_dlb_resource_sharing(cr, nonbondedDeviceId); + } + + } + } + + std::unique_ptr<ClfftInitializer> initializedClfftLibrary; + + gmx_device_info_t *pmeDeviceInfo = nullptr; + // Later, this program could contain kernels that might be later + // re-used as auto-tuning progresses, or subsequent simulations + // are invoked. + PmeGpuProgramStorage pmeGpuProgram; + // This works because only one task of each type is currently permitted. + auto pmeGpuTaskMapping = std::find_if(gpuTaskAssignment.begin(), gpuTaskAssignment.end(), hasTaskType<GpuTask::Pme>); + const bool thisRankHasPmeGpuTask = (pmeGpuTaskMapping != gpuTaskAssignment.end()); + if (thisRankHasPmeGpuTask) + { + pmeDeviceInfo = getDeviceInfo(hwinfo->gpu_info, pmeGpuTaskMapping->deviceId_); + init_gpu(pmeDeviceInfo); + pmeGpuProgram = buildPmeGpuProgram(pmeDeviceInfo); + // TODO It would be nice to move this logic into the factory + // function. See Redmine #2535. + bool isMasterThread = !GMX_THREAD_MPI || MASTER(cr); + if (pmeRunMode == PmeRunMode::GPU && !initializedClfftLibrary && isMasterThread) + { + initializedClfftLibrary = initializeClfftLibrary(); + } + } + + /* getting number of PP/PME threads + PME: env variable should be read only on one node to make sure it is + identical everywhere; + */ + nthreads_pme = gmx_omp_nthreads_get(emntPME); + + int numThreadsOnThisRank; + /* threads on this MPI process or TMPI thread */ + if (thisRankHasDuty(cr, DUTY_PP)) + { + numThreadsOnThisRank = gmx_omp_nthreads_get(emntNonbonded); + } + else + { + numThreadsOnThisRank = nthreads_pme; + } + + checkHardwareOversubscription(numThreadsOnThisRank, cr->nodeid, + *hwinfo->hardwareTopology, + physicalNodeComm, mdlog); + + if (hw_opt.thread_affinity != threadaffOFF) + { + /* Before setting affinity, check whether the affinity has changed + * - which indicates that probably the OpenMP library has changed it + * since we first checked). + */ + gmx_check_thread_affinity_set(mdlog, cr, + &hw_opt, hwinfo->nthreads_hw_avail, TRUE); + + int numThreadsOnThisNode, intraNodeThreadOffset; + analyzeThreadsOnThisNode(physicalNodeComm, numThreadsOnThisRank, &numThreadsOnThisNode, + &intraNodeThreadOffset); + + /* Set the CPU affinity */ + gmx_set_thread_affinity(mdlog, cr, &hw_opt, *hwinfo->hardwareTopology, + numThreadsOnThisRank, numThreadsOnThisNode, + intraNodeThreadOffset, nullptr); + } + + if (mdrunOptions.timingOptions.resetStep > -1) + { + GMX_LOG(mdlog.info).asParagraph(). + appendText("The -resetstep functionality is deprecated, and may be removed in a future version."); + } + wcycle = wallcycle_init(fplog, mdrunOptions.timingOptions.resetStep, cr); + + if (PAR(cr)) + { + /* Master synchronizes its value of reset_counters with all nodes + * including PME only nodes */ + reset_counters = wcycle_get_reset_counters(wcycle); + gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr); + wcycle_set_reset_counters(wcycle, reset_counters); + } + + // Membrane embedding must be initialized before we call init_forcerec() + if (doMembed) + { + if (MASTER(cr)) + { + fprintf(stderr, "Initializing membed"); + } + /* Note that membed cannot work in parallel because mtop is + * changed here. Fix this if we ever want to make it run with + * multiple ranks. */ + membed = init_membed(fplog, filenames.size(), filenames.data(), &mtop, inputrec, globalState.get(), cr, + &mdrunOptions + .checkpointOptions.period); + } + + std::unique_ptr<MDAtoms> mdAtoms; + std::unique_ptr<gmx_vsite_t> vsite; + + snew(nrnb, 1); + if (thisRankHasDuty(cr, DUTY_PP)) + { + /* Initiate forcerecord */ + fr = mk_forcerec(); + fr->forceProviders = mdModules->initForceProviders(); + init_forcerec(fplog, mdlog, fr, fcd, + inputrec, &mtop, cr, box, + opt2fn("-table", filenames.size(), filenames.data()), + opt2fn("-tablep", filenames.size(), filenames.data()), + opt2fns("-tableb", filenames.size(), filenames.data()), + *hwinfo, nonbondedDeviceInfo, + useGpuForBonded, + FALSE, + pforce); + + /* Initialize the mdAtoms structure. + * mdAtoms is not filled with atom data, + * as this can not be done now with domain decomposition. + */ + mdAtoms = makeMDAtoms(fplog, mtop, *inputrec, thisRankHasPmeGpuTask); + if (globalState && thisRankHasPmeGpuTask) + { + // The pinning of coordinates in the global state object works, because we only use + // PME on GPU without DD or on a separate PME rank, and because the local state pointer + // points to the global state object without DD. + // FIXME: MD and EM separately set up the local state - this should happen in the same function, + // which should also perform the pinning. + changePinningPolicy(&globalState->x, pme_get_pinning_policy()); + } + + /* Initialize the virtual site communication */ + vsite = initVsite(mtop, cr); + + calc_shifts(box, fr->shift_vec); + + /* With periodic molecules the charge groups should be whole at start up + * and the virtual sites should not be far from their proper positions. + */ + if (!inputrec->bContinuation && MASTER(cr) && + !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols)) + { + /* Make molecules whole at start of run */ + if (fr->ePBC != epbcNONE) + { + do_pbc_first_mtop(fplog, inputrec->ePBC, box, &mtop, globalState->x.rvec_array()); + } + if (vsite) + { + /* Correct initial vsite positions are required + * for the initial distribution in the domain decomposition + * and for the initial shell prediction. + */ + constructVsitesGlobal(mtop, globalState->x); + } + } + + if (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) + { + ewaldcoeff_q = fr->ic->ewaldcoeff_q; + ewaldcoeff_lj = fr->ic->ewaldcoeff_lj; + } + } + else + { + /* This is a PME only node */ + + GMX_ASSERT(globalState == nullptr, "We don't need the state on a PME only rank and expect it to be unitialized"); + + ewaldcoeff_q = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol); + ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj); + } + + gmx_pme_t *sepPmeData = nullptr; + // This reference hides the fact that PME data is owned by runner on PME-only ranks and by forcerec on other ranks + GMX_ASSERT(thisRankHasDuty(cr, DUTY_PP) == (fr != nullptr), "Double-checking that only PME-only ranks have no forcerec"); + gmx_pme_t * &pmedata = fr ? fr->pmedata : sepPmeData; + + /* Initiate PME if necessary, + * either on all nodes or on dedicated PME nodes only. */ + if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) + { + if (mdAtoms && mdAtoms->mdatoms()) + { + nChargePerturbed = mdAtoms->mdatoms()->nChargePerturbed; + if (EVDW_PME(inputrec->vdwtype)) + { + nTypePerturbed = mdAtoms->mdatoms()->nTypePerturbed; + } + } + if (cr->npmenodes > 0) + { + /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/ + gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr); + gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr); + } + + if (thisRankHasDuty(cr, DUTY_PME)) + { + try + { + pmedata = gmx_pme_init(cr, + getNumPmeDomains(cr->dd), + inputrec, + mtop.natoms, nChargePerturbed != 0, nTypePerturbed != 0, + mdrunOptions.reproducible, + ewaldcoeff_q, ewaldcoeff_lj, + nthreads_pme, + pmeRunMode, nullptr, + pmeDeviceInfo, pmeGpuProgram.get(), mdlog); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } + } + + + if (EI_DYNAMICS(inputrec->eI)) + { + /* Turn on signal handling on all nodes */ + /* + * (A user signal from the PME nodes (if any) + * is communicated to the PP nodes. + */ + signal_handler_install(); + } + + if (thisRankHasDuty(cr, DUTY_PP)) + { + /* Assumes uniform use of the number of OpenMP threads */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault)); + + if (inputrec->bPull) + { + /* Initialize pull code */ + inputrec->pull_work = + init_pull(fplog, inputrec->pull, inputrec, + &mtop, cr, &atomSets, inputrec->fepvals->init_lambda); + if (inputrec->pull->bXOutAverage || inputrec->pull->bFOutAverage) + { + initPullHistory(inputrec->pull_work, &observablesHistory); + } + if (EI_DYNAMICS(inputrec->eI) && MASTER(cr)) + { + init_pull_output_files(inputrec->pull_work, + filenames.size(), filenames.data(), oenv, + continuationOptions); + } + } + + std::unique_ptr<EnforcedRotation> enforcedRotation; + if (inputrec->bRot) + { + /* Initialize enforced rotation code */ + enforcedRotation = init_rot(fplog, + inputrec, + filenames.size(), + filenames.data(), + cr, + &atomSets, + globalState.get(), + &mtop, + oenv, + mdrunOptions); + } + + if (inputrec->eSwapCoords != eswapNO) + { + /* Initialize ion swapping code */ + init_swapcoords(fplog, inputrec, opt2fn_master("-swap", filenames.size(), filenames.data(), cr), + &mtop, globalState.get(), &observablesHistory, + cr, &atomSets, oenv, mdrunOptions); + } + + /* Let makeConstraints know whether we have essential dynamics constraints. + * TODO: inputrec should tell us whether we use an algorithm, not a file option or the checkpoint + */ + bool doEssentialDynamics = (opt2fn_null("-ei", filenames.size(), filenames.data()) != nullptr + || observablesHistory.edsamHistory); + auto constr = makeConstraints(mtop, *inputrec, doEssentialDynamics, + fplog, *mdAtoms->mdatoms(), + cr, ms, nrnb, wcycle, fr->bMolPBC); + + if (DOMAINDECOMP(cr)) + { + GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP"); + /* This call is not included in init_domain_decomposition mainly + * because fr->cginfo_mb is set later. + */ + dd_init_bondeds(fplog, cr->dd, &mtop, vsite.get(), inputrec, + domdecOptions.checkBondedInteractions, + fr->cginfo_mb); + } + + // TODO This is not the right place to manage the lifetime of + // this data structure, but currently it's the easiest way to + // make it work. Later, it should probably be made/updated + // after the workload for the lifetime of a PP domain is + // understood. + PpForceWorkload ppForceWorkload; + + GMX_ASSERT(stopHandlerBuilder_, "Runner must provide StopHandlerBuilder to integrator."); + /* Now do whatever the user wants us to do (how flexible...) */ + Integrator integrator { + fplog, cr, ms, mdlog, static_cast<int>(filenames.size()), filenames.data(), + oenv, + mdrunOptions, + vsite.get(), constr.get(), + enforcedRotation ? enforcedRotation->getLegacyEnfrot() : nullptr, + deform.get(), + mdModules->outputProvider(), + inputrec, &mtop, + fcd, + globalState.get(), + &observablesHistory, + mdAtoms.get(), nrnb, wcycle, fr, + &ppForceWorkload, + replExParams, + membed, + walltime_accounting, + std::move(stopHandlerBuilder_) + }; + integrator.run(inputrec->eI, doRerun); + + if (inputrec->bPull) + { + finish_pull(inputrec->pull_work); + } + + } + else + { + GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP"); + /* do PME only */ + walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME)); + gmx_pmeonly(pmedata, cr, nrnb, wcycle, walltime_accounting, inputrec, pmeRunMode); + } + + wallcycle_stop(wcycle, ewcRUN); + + /* Finish up, write some stuff + * if rerunMD, don't write last frame again + */ + finish_run(fplog, mdlog, cr, + inputrec, nrnb, wcycle, walltime_accounting, + fr ? fr->nbv : nullptr, + pmedata, + EI_DYNAMICS(inputrec->eI) && !isMultiSim(ms)); + + // Free PME data + if (pmedata) + { + gmx_pme_destroy(pmedata); + pmedata = nullptr; + } + + // FIXME: this is only here to manually unpin mdAtoms->chargeA_ and state->x, + // before we destroy the GPU context(s) in free_gpu_resources(). + // Pinned buffers are associated with contexts in CUDA. + // As soon as we destroy GPU contexts after mdrunner() exits, these lines should go. + mdAtoms.reset(nullptr); + globalState.reset(nullptr); + mdModules.reset(nullptr); // destruct force providers here as they might also use the GPU + + /* Free GPU memory and set a physical node tMPI barrier (which should eventually go away) */ + free_gpu_resources(fr, physicalNodeComm); + free_gpu(nonbondedDeviceInfo); + free_gpu(pmeDeviceInfo); + done_forcerec(fr, mtop.molblock.size(), mtop.groups.grps[egcENER].nr); + sfree(fcd); + + if (doMembed) + { + free_membed(membed); + } + + gmx_hardware_info_free(); + + /* Does what it says */ + print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime()); + walltime_accounting_destroy(walltime_accounting); + sfree(nrnb); + + // Ensure log file content is written + if (logFileHandle) + { + gmx_fio_flush(logFileHandle); + } + + /* Reset FPEs (important for unit tests) by disabling them. Assumes no + * exceptions were enabled before function was called. */ + if (bEnableFPE) + { + gmx_fedisableexcept(); + } + + rc = static_cast<int>(gmx_get_stop_condition()); + +#if GMX_THREAD_MPI + /* we need to join all threads. The sub-threads join when they + exit this function, but the master thread needs to be told to + wait for that. */ + if (PAR(cr) && MASTER(cr)) + { + done_commrec(cr); + tMPI_Finalize(); + } +#endif + + return rc; +} + +Mdrunner::~Mdrunner() +{ + // Clean up of the Manager. + // This will end up getting called on every thread-MPI rank, which is unnecessary, + // but okay as long as threads synchronize some time before adding or accessing + // a new set of restraints. + if (restraintManager_) + { + restraintManager_->clear(); + GMX_ASSERT(restraintManager_->countRestraints() == 0, + "restraints added during runner life time should be cleared at runner destruction."); + } +}; + +void Mdrunner::addPotential(std::shared_ptr<gmx::IRestraintPotential> puller, + std::string name) +{ + GMX_ASSERT(restraintManager_, "Mdrunner must have a restraint manager."); + // Not sure if this should be logged through the md logger or something else, + // but it is helpful to have some sort of INFO level message sent somewhere. + // std::cout << "Registering restraint named " << name << std::endl; + + // When multiple restraints are used, it may be wasteful to register them separately. + // Maybe instead register an entire Restraint Manager as a force provider. + restraintManager_->addToSpec(std::move(puller), + std::move(name)); +} + +Mdrunner::Mdrunner(Mdrunner &&) noexcept = default; + +//NOLINTNEXTLINE(performance-noexcept-move-constructor) working around GCC bug 58265 +Mdrunner &Mdrunner::operator=(Mdrunner && /*handle*/) noexcept(BUGFREE_NOEXCEPT_STRING) = default; + +class Mdrunner::BuilderImplementation +{ + public: + BuilderImplementation() = delete; + explicit BuilderImplementation(SimulationContext* context); + ~BuilderImplementation(); + + BuilderImplementation &setExtraMdrunOptions(const MdrunOptions &options, + real forceWarningThreshold); + + void addDomdec(const DomdecOptions &options); + + void addVerletList(int nstlist); + + void addReplicaExchange(const ReplicaExchangeParameters ¶ms); + + void addMultiSim(gmx_multisim_t* multisim); + + void addNonBonded(const char* nbpu_opt); + + void addPME(const char* pme_opt_, const char* pme_fft_opt_); + + void addBondedTaskAssignment(const char* bonded_opt); + + void addHardwareOptions(const gmx_hw_opt_t &hardwareOptions); + + void addFilenames(ArrayRef <const t_filenm> filenames); + + void addOutputEnvironment(gmx_output_env_t* outputEnvironment); + + void addLogFile(t_fileio *logFileHandle); + + void addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder); + + Mdrunner build(); + + private: + // Default parameters copied from runner.h + // \todo Clarify source(s) of default parameters. + + const char* nbpu_opt_ = nullptr; + const char* pme_opt_ = nullptr; + const char* pme_fft_opt_ = nullptr; + const char *bonded_opt_ = nullptr; + + MdrunOptions mdrunOptions_; + + DomdecOptions domdecOptions_; + + ReplicaExchangeParameters replicaExchangeParameters_; + + //! Command-line override for the duration of a neighbor list with the Verlet scheme. + int nstlist_ = 0; + + //! Non-owning multisim communicator handle. + std::unique_ptr<gmx_multisim_t*> multisim_ = nullptr; + + //! Print a warning if any force is larger than this (in kJ/mol nm). + real forceWarningThreshold_ = -1; + + /*! \brief Non-owning pointer to SimulationContext (owned and managed by client) + * + * \internal + * \todo Establish robust protocol to make sure resources remain valid. + * SimulationContext will likely be separated into multiple layers for + * different levels of access and different phases of execution. Ref + * https://redmine.gromacs.org/issues/2375 + * https://redmine.gromacs.org/issues/2587 + */ + SimulationContext* context_ = nullptr; + + //! \brief Parallelism information. + gmx_hw_opt_t hardwareOptions_; + + //! filename options for simulation. + ArrayRef<const t_filenm> filenames_; + + /*! \brief Handle to output environment. + * + * \todo gmx_output_env_t needs lifetime management. + */ + gmx_output_env_t* outputEnvironment_ = nullptr; + + /*! \brief Non-owning handle to MD log file. + * + * \todo Context should own output facilities for client. + * \todo Improve log file handle management. + * \internal + * Code managing the FILE* relies on the ability to set it to + * nullptr to check whether the filehandle is valid. + */ + t_fileio* logFileHandle_ = nullptr; + + /*! + * \brief Builder for simulation stop signal handler. + */ + std::unique_ptr<StopHandlerBuilder> stopHandlerBuilder_ = nullptr; +}; + +Mdrunner::BuilderImplementation::BuilderImplementation(SimulationContext* context) : + context_(context) +{ + GMX_ASSERT(context_, "Bug found. It should not be possible to construct builder without a valid context."); +} + +Mdrunner::BuilderImplementation::~BuilderImplementation() = default; + +Mdrunner::BuilderImplementation & +Mdrunner::BuilderImplementation::setExtraMdrunOptions(const MdrunOptions &options, + real forceWarningThreshold) +{ + mdrunOptions_ = options; + forceWarningThreshold_ = forceWarningThreshold; + return *this; +} + +void Mdrunner::BuilderImplementation::addDomdec(const DomdecOptions &options) +{ + domdecOptions_ = options; +} + +void Mdrunner::BuilderImplementation::addVerletList(int nstlist) +{ + nstlist_ = nstlist; +} + +void Mdrunner::BuilderImplementation::addReplicaExchange(const ReplicaExchangeParameters ¶ms) +{ + replicaExchangeParameters_ = params; +} + +void Mdrunner::BuilderImplementation::addMultiSim(gmx_multisim_t* multisim) +{ + multisim_ = compat::make_unique<gmx_multisim_t*>(multisim); +} + +Mdrunner Mdrunner::BuilderImplementation::build() +{ + auto newRunner = Mdrunner(); + + GMX_ASSERT(context_, "Bug found. It should not be possible to call build() without a valid context."); + + newRunner.mdrunOptions = mdrunOptions_; + newRunner.domdecOptions = domdecOptions_; + + // \todo determine an invariant to check or confirm that all gmx_hw_opt_t objects are valid + newRunner.hw_opt = hardwareOptions_; + + // No invariant to check. This parameter exists to optionally override other behavior. + newRunner.nstlist_cmdline = nstlist_; + + newRunner.replExParams = replicaExchangeParameters_; + + newRunner.filenames = filenames_; + + GMX_ASSERT(context_->communicationRecord_, "SimulationContext communications not initialized."); + newRunner.cr = context_->communicationRecord_; + + if (multisim_) + { + // nullptr is a valid value for the multisim handle, so we don't check the pointed-to pointer. + newRunner.ms = *multisim_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addMultiSim() is required before build()")); + } + + // \todo Clarify ownership and lifetime management for gmx_output_env_t + // \todo Update sanity checking when output environment has clearly specified invariants. + // Initialization and default values for oenv are not well specified in the current version. + if (outputEnvironment_) + { + newRunner.oenv = outputEnvironment_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addOutputEnvironment() is required before build()")); + } + + newRunner.logFileHandle = logFileHandle_; + + if (nbpu_opt_) + { + newRunner.nbpu_opt = nbpu_opt_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addNonBonded() is required before build()")); + } + + if (pme_opt_ && pme_fft_opt_) + { + newRunner.pme_opt = pme_opt_; + newRunner.pme_fft_opt = pme_fft_opt_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addElectrostatics() is required before build()")); + } + + if (bonded_opt_) + { + newRunner.bonded_opt = bonded_opt_; + } + else + { + GMX_THROW(gmx::APIError("MdrunnerBuilder::addBondedTaskAssignment() is required before build()")); + } + + newRunner.restraintManager_ = compat::make_unique<gmx::RestraintManager>(); + + if (stopHandlerBuilder_) + { + newRunner.stopHandlerBuilder_ = std::move(stopHandlerBuilder_); + } + else + { + newRunner.stopHandlerBuilder_ = compat::make_unique<StopHandlerBuilder>(); + } + + return newRunner; +} + +void Mdrunner::BuilderImplementation::addNonBonded(const char* nbpu_opt) +{ + nbpu_opt_ = nbpu_opt; +} + +void Mdrunner::BuilderImplementation::addPME(const char* pme_opt, + const char* pme_fft_opt) +{ + pme_opt_ = pme_opt; + pme_fft_opt_ = pme_fft_opt; +} + +void Mdrunner::BuilderImplementation::addBondedTaskAssignment(const char* bonded_opt) +{ + bonded_opt_ = bonded_opt; +} + +void Mdrunner::BuilderImplementation::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions) +{ + hardwareOptions_ = hardwareOptions; +} + +void Mdrunner::BuilderImplementation::addFilenames(ArrayRef<const t_filenm> filenames) +{ + filenames_ = filenames; +} + +void Mdrunner::BuilderImplementation::addOutputEnvironment(gmx_output_env_t* outputEnvironment) +{ + outputEnvironment_ = outputEnvironment; +} + +void Mdrunner::BuilderImplementation::addLogFile(t_fileio *logFileHandle) +{ + logFileHandle_ = logFileHandle; +} + +void Mdrunner::BuilderImplementation::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder) +{ + stopHandlerBuilder_ = std::move(builder); +} + +MdrunnerBuilder::MdrunnerBuilder(compat::not_null<SimulationContext*> context) : + impl_ {gmx::compat::make_unique<Mdrunner::BuilderImplementation>(context)} +{ +} + +MdrunnerBuilder::~MdrunnerBuilder() = default; + +MdrunnerBuilder &MdrunnerBuilder::addSimulationMethod(const MdrunOptions &options, + real forceWarningThreshold) +{ + impl_->setExtraMdrunOptions(options, forceWarningThreshold); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addDomainDecomposition(const DomdecOptions &options) +{ + impl_->addDomdec(options); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addNeighborList(int nstlist) +{ + impl_->addVerletList(nstlist); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addReplicaExchange(const ReplicaExchangeParameters ¶ms) +{ + impl_->addReplicaExchange(params); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addMultiSim(gmx_multisim_t* multisim) +{ + impl_->addMultiSim(multisim); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addNonBonded(const char* nbpu_opt) +{ + impl_->addNonBonded(nbpu_opt); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addElectrostatics(const char* pme_opt, + const char* pme_fft_opt) +{ + // The builder method may become more general in the future, but in this version, + // parameters for PME electrostatics are both required and the only parameters + // available. + if (pme_opt && pme_fft_opt) + { + impl_->addPME(pme_opt, pme_fft_opt); + } + else + { + GMX_THROW(gmx::InvalidInputError("addElectrostatics() arguments must be non-null pointers.")); + } + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addBondedTaskAssignment(const char* bonded_opt) +{ + impl_->addBondedTaskAssignment(bonded_opt); + return *this; +} + +Mdrunner MdrunnerBuilder::build() +{ + return impl_->build(); +} + +MdrunnerBuilder &MdrunnerBuilder::addHardwareOptions(const gmx_hw_opt_t &hardwareOptions) +{ + impl_->addHardwareOptions(hardwareOptions); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addFilenames(ArrayRef<const t_filenm> filenames) +{ + impl_->addFilenames(filenames); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addOutputEnvironment(gmx_output_env_t* outputEnvironment) +{ + impl_->addOutputEnvironment(outputEnvironment); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addLogFile(t_fileio *logFileHandle) +{ + impl_->addLogFile(logFileHandle); + return *this; +} + +MdrunnerBuilder &MdrunnerBuilder::addStopHandlerBuilder(std::unique_ptr<StopHandlerBuilder> builder) +{ + impl_->addStopHandlerBuilder(std::move(builder)); + return *this; +} + +MdrunnerBuilder::MdrunnerBuilder(MdrunnerBuilder &&) noexcept = default; + +MdrunnerBuilder &MdrunnerBuilder::operator=(MdrunnerBuilder &&) noexcept = default; + +} // namespace gmx