diff --git a/CHANGES/v2.5.md b/CHANGES/v2.5.md
index f54c573032374302c21d6978cf0a8a036a59e508..109bbfd188737639e7d5c563956285031792ee54 100644
--- a/CHANGES/v2.5.md
+++ b/CHANGES/v2.5.md
@@ -35,6 +35,7 @@ Changes from version 2.4 which are relevant for users:
     by matheval (easily writable as a function of the available ones).
   - Implemented bash autocompletion, see \ref BashAutocompletion.
   - \ref MOLINFO now allows selecting atoms from chains with a numeric ID (see \issue{320}).
+  - Removed the patch for GMX 5.0.4
 
 Changes from version 2.4 which are relevant for developers:
 - Code has been cleanup up replacing a number of pointers with `std::unique_ptr`.
diff --git a/patches/gromacs-5.1.4.config b/patches/gromacs-5.1.4.config
deleted file mode 100644
index ae65d400e7de4984db03a0a74349b1a50704db20..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.config
+++ /dev/null
@@ -1,35 +0,0 @@
-
-
-function plumed_preliminary_test(){
-# check if the README contains the word GROMACS and if gromacs has been already configured
-  grep -q GROMACS README 1>/dev/null 2>/dev/null
-}
-
-function plumed_patch_info(){
-cat << EOF
-PLUMED can be incorporated into gromacs using the standard patching procedure.
-Patching must be done in the gromacs root directory  _before_ the cmake command is invoked.
-
-On clusters you may want to patch gromacs using the static version of plumed, in this case
-building gromacs can result in multiple errors. One possible solution is to configure gromacs
-with these additional options:
-
-cmake -DBUILD_SHARED_LIBS=OFF -DGMX_PREFER_STATIC_LIBS=ON
-
-To enable PLUMED in a gromacs simulation one should use
-mdrun with an extra -plumed flag. The flag can be used to
-specify the name of the PLUMED input file, e.g.:
-
-gmx mdrun -plumed plumed.dat
-
-This patch also implements the -hrex keyword for gromacs. See \ref hrex
-
-For more information on gromacs you should visit http://www.gromacs.org
-
-EOF
-}
-
-plumed_before_patch(){
-  plumed_patch_info
-}
-
diff --git a/patches/gromacs-5.1.4.diff/src/gromacs/CMakeLists.txt b/patches/gromacs-5.1.4.diff/src/gromacs/CMakeLists.txt
deleted file mode 100644
index 2af9fdf862dea3a884a4aaf07865bb85b43cc9ba..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/gromacs/CMakeLists.txt
+++ /dev/null
@@ -1,257 +0,0 @@
-#
-# This file is part of the GROMACS molecular simulation package.
-#
-# Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
-# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
-# and including many others, as listed in the AUTHORS file in the
-# top-level source directory and at http://www.gromacs.org.
-#
-# GROMACS is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# GROMACS is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with GROMACS; if not, see
-# http://www.gnu.org/licenses, or write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
-#
-# If you want to redistribute modifications to GROMACS, please
-# consider that scientific software is very special. Version
-# control is crucial - bugs must be traceable. We will be happy to
-# consider code for inclusion in the official distribution, but
-# derived work must not be called official GROMACS. Details are found
-# in the README & COPYING files - if they are missing, get the
-# official version at http://www.gromacs.org.
-#
-# To help us fund GROMACS development, we humbly ask that you cite
-# the research papers on the package. Check out http://www.gromacs.org.
-
-include(${CMAKE_SOURCE_DIR}/Plumed.cmake)
-
-set(LIBGROMACS_SOURCES)
-
-set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
-
-function (gmx_install_headers)
-    if (NOT GMX_BUILD_MDRUN_ONLY)
-        file(RELATIVE_PATH _dest ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_LIST_DIR})
-        install(FILES       ${ARGN}
-                DESTINATION "${INCL_INSTALL_DIR}/${_dest}"
-                COMPONENT   development)
-    endif()
-    foreach (_file ${ARGN})
-        if (IS_ABSOLUTE "${_file}")
-            set_property(GLOBAL APPEND PROPERTY GMX_INSTALLED_HEADERS ${_file})
-        else()
-            set_property(GLOBAL APPEND PROPERTY GMX_INSTALLED_HEADERS
-                         ${CMAKE_CURRENT_LIST_DIR}/${_file})
-        endif()
-    endforeach()
-endfunction ()
-
-function (gmx_write_installed_header_list)
-    get_property(_list GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
-    string(REPLACE ";" "\n" _list "${_list}")
-    # TODO: Make this only update the file timestamp if the contents actually change.
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt "${_list}")
-endfunction()
-
-if(GMX_USE_TNG)
-    option(GMX_EXTERNAL_TNG "Use external TNG instead of compiling the version shipped with GROMACS."
-           OFF)
-    # Detect TNG if GMX_EXTERNAL_TNG is explicitly ON
-    if(GMX_EXTERNAL_TNG)
-        find_package(TNG_IO 1.6.0)
-        if(NOT TNG_IO_FOUND)
-            message(FATAL_ERROR
-                "TNG >= 1.6.0 not found. "
-                "You can set GMX_EXTERNAL_TNG=OFF to compile TNG.")
-        endif()
-        include_directories(SYSTEM ${TNG_IO_INCLUDE_DIRS})
-    endif()
-    if(NOT GMX_EXTERNAL_TNG)
-        include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake)
-        tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS)
-        list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES})
-        tng_set_source_properties(WITH_ZLIB ${HAVE_ZLIB})
-
-        if (HAVE_ZLIB)
-            list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES})
-            include_directories(SYSTEM ${ZLIB_INCLUDE_DIRS})
-        endif()
-    endif()
-else()
-    # We still need to get tng/tng_io_fwd.h from somewhere!
-    include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include)
-endif()
-
-add_subdirectory(gmxlib)
-add_subdirectory(mdlib)
-add_subdirectory(listed-forces)
-add_subdirectory(commandline)
-add_subdirectory(domdec)
-add_subdirectory(ewald)
-add_subdirectory(fft)
-add_subdirectory(linearalgebra)
-add_subdirectory(math)
-add_subdirectory(mdrunutility)
-add_subdirectory(random)
-add_subdirectory(onlinehelp)
-add_subdirectory(options)
-add_subdirectory(pbcutil)
-add_subdirectory(timing)
-add_subdirectory(topology)
-add_subdirectory(utility)
-add_subdirectory(fileio)
-add_subdirectory(swap)
-add_subdirectory(essentialdynamics)
-add_subdirectory(pulling)
-add_subdirectory(simd)
-add_subdirectory(imd)
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    add_subdirectory(legacyheaders)
-    add_subdirectory(gmxana)
-    add_subdirectory(gmxpreprocess)
-    add_subdirectory(correlationfunctions)
-    add_subdirectory(statistics)
-    add_subdirectory(analysisdata)
-    add_subdirectory(selection)
-    add_subdirectory(trajectoryanalysis)
-    add_subdirectory(tools)
-endif()
-
-list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES})
-
-# This would be the standard way to include thread_mpi, but
-# we want libgromacs to link the functions directly
-#if(GMX_THREAD_MPI)
-#    add_subdirectory(thread_mpi)
-#endif()
-#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB})
-
-tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src)
-list(APPEND LIBGROMACS_SOURCES ${THREAD_MPI_SOURCES})
-
-configure_file(version.h.cmakein version.h)
-gmx_install_headers(
-    analysisdata.h
-    commandline.h
-    options.h
-    selection.h
-    trajectoryanalysis.h
-    utility.h
-    ${CMAKE_CURRENT_BINARY_DIR}/version.h
-    )
-
-# This code is here instead of utility/CMakeLists.txt, because CMake
-# custom commands and source file properties can only be set in the directory
-# that contains the target that uses them.
-# TODO: Generate a header instead that can be included from baseversion.c.
-# That probably simplifies things somewhat.
-set(GENERATED_VERSION_FILE utility/baseversion-gen.c)
-gmx_configure_version_file(
-    utility/baseversion-gen.c.cmakein ${GENERATED_VERSION_FILE}
-    REMOTE_HASH SOURCE_FILE)
-list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
-
-# apply gcc 4.4.x bug workaround
-if(GMX_USE_GCC44_BUG_WORKAROUND)
-   include(gmxGCC44O3BugWorkaround)
-   gmx_apply_gcc44_bug_workaround("listed-forces/bonded.cpp")
-   gmx_apply_gcc44_bug_workaround("mdlib/force.c")
-   gmx_apply_gcc44_bug_workaround("mdlib/constr.c")
-endif()
-
-if (GMX_GPU AND NOT GMX_USE_OPENCL)
-    cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}
-            OPTIONS
-            RELWITHDEBINFO -g
-            DEBUG -g -D_DEBUG_=1)
-else()
-    add_library(libgromacs ${LIBGROMACS_SOURCES})
-endif()
-
-# Recent versions of gcc and clang give warnings on scanner.cpp, which
-# is a generated source file. These are awkward to suppress inline, so
-# we do it in the compilation command (after testing that the compiler
-# supports the suppressions). Setting the properties only works after
-# the related target has been created, e.g. after when the file is
-# used with add_library().
-include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag(-Wno-unused-parameter HAS_NO_UNUSED_PARAMETER)
-if (HAS_NO_UNUSED_PARAMETER)
-    set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-unused-parameter")
-endif()
-check_cxx_compiler_flag(-Wno-deprecated-register HAS_NO_DEPRECATED_REGISTER)
-if (HAS_NO_DEPRECATED_REGISTER)
-    set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated-register")
-else()
-    check_cxx_compiler_flag(-Wno-deprecated HAS_NO_DEPRECATED)
-    if (HAS_NO_DEPRECATED)
-        set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated")
-    endif()
-endif()
-set_source_files_properties(selection/scanner.cpp PROPERTIES COMPILE_FLAGS "${_scanner_cpp_compiler_flags}")
-
-target_link_libraries(libgromacs ${PLUMED_LOAD})
-
-target_link_libraries(libgromacs
-                      ${EXTRAE_LIBRARIES}
-                      ${GMX_EXTRA_LIBRARIES}
-                      ${TNG_IO_LIBRARIES}
-                      ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES}
-                      ${XML_LIBRARIES}
-                      ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${OPENCL_LIBRARIES})
-
-
-set_target_properties(libgromacs PROPERTIES
-                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
-                      SOVERSION ${LIBRARY_SOVERSION_MAJOR}
-                      VERSION ${LIBRARY_VERSION}
-                      COMPILE_FLAGS "${OpenMP_C_FLAGS}")
-
-gmx_write_installed_header_list()
-
-# Only install the library in mdrun-only mode if it is actually necessary
-# for the binary
-if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS)
-    install(TARGETS libgromacs
-            EXPORT libgromacs
-            LIBRARY DESTINATION ${LIB_INSTALL_DIR}
-            RUNTIME DESTINATION ${BIN_INSTALL_DIR}
-            ARCHIVE DESTINATION ${LIB_INSTALL_DIR}
-            COMPONENT libraries)
-endif()
-
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    include(InstallLibInfo.cmake)
-endif()
-
-if (INSTALL_CUDART_LIB) #can be set manual by user
-    if (GMX_GPU AND NOT GMX_USE_OPENCL)
-        foreach(CUDA_LIB ${CUDA_LIBRARIES})
-            string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB})
-            if(IS_CUDART) #libcuda should not be installed
-                #install also name-links (linker uses those)
-                file(GLOB CUDA_LIBS ${CUDA_LIB}*)
-                install(FILES ${CUDA_LIBS} DESTINATION
-                    ${LIB_INSTALL_DIR} COMPONENT libraries)
-            endif()
-        endforeach()
-    else()
-        message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_GPU")
-    endif()
-endif()
-
-if(GMX_GPU AND GMX_USE_OPENCL)
-    set(OPENCL_KERNELS ${MDLIB_OPENCL_KERNELS})
-
-    install(FILES ${OPENCL_KERNELS} DESTINATION
-        ${OCL_INSTALL_DIR} COMPONENT libraries)
-endif()
diff --git a/patches/gromacs-5.1.4.diff/src/gromacs/CMakeLists.txt.preplumed b/patches/gromacs-5.1.4.diff/src/gromacs/CMakeLists.txt.preplumed
deleted file mode 100644
index 467b0be4c1abb18c0a53a96ed052353a608d4a0f..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/gromacs/CMakeLists.txt.preplumed
+++ /dev/null
@@ -1,251 +0,0 @@
-#
-# This file is part of the GROMACS molecular simulation package.
-#
-# Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
-# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
-# and including many others, as listed in the AUTHORS file in the
-# top-level source directory and at http://www.gromacs.org.
-#
-# GROMACS is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public License
-# as published by the Free Software Foundation; either version 2.1
-# of the License, or (at your option) any later version.
-#
-# GROMACS is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with GROMACS; if not, see
-# http://www.gnu.org/licenses, or write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
-#
-# If you want to redistribute modifications to GROMACS, please
-# consider that scientific software is very special. Version
-# control is crucial - bugs must be traceable. We will be happy to
-# consider code for inclusion in the official distribution, but
-# derived work must not be called official GROMACS. Details are found
-# in the README & COPYING files - if they are missing, get the
-# official version at http://www.gromacs.org.
-#
-# To help us fund GROMACS development, we humbly ask that you cite
-# the research papers on the package. Check out http://www.gromacs.org.
-
-set(LIBGROMACS_SOURCES)
-
-set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
-
-function (gmx_install_headers)
-    if (NOT GMX_BUILD_MDRUN_ONLY)
-        file(RELATIVE_PATH _dest ${PROJECT_SOURCE_DIR}/src ${CMAKE_CURRENT_LIST_DIR})
-        install(FILES       ${ARGN}
-                DESTINATION "${INCL_INSTALL_DIR}/${_dest}"
-                COMPONENT   development)
-    endif()
-    foreach (_file ${ARGN})
-        if (IS_ABSOLUTE "${_file}")
-            set_property(GLOBAL APPEND PROPERTY GMX_INSTALLED_HEADERS ${_file})
-        else()
-            set_property(GLOBAL APPEND PROPERTY GMX_INSTALLED_HEADERS
-                         ${CMAKE_CURRENT_LIST_DIR}/${_file})
-        endif()
-    endforeach()
-endfunction ()
-
-function (gmx_write_installed_header_list)
-    get_property(_list GLOBAL PROPERTY GMX_INSTALLED_HEADERS)
-    string(REPLACE ";" "\n" _list "${_list}")
-    # TODO: Make this only update the file timestamp if the contents actually change.
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/installed-headers.txt "${_list}")
-endfunction()
-
-if(GMX_USE_TNG)
-    option(GMX_EXTERNAL_TNG "Use external TNG instead of compiling the version shipped with GROMACS."
-           OFF)
-    # Detect TNG if GMX_EXTERNAL_TNG is explicitly ON
-    if(GMX_EXTERNAL_TNG)
-        find_package(TNG_IO 1.6.0)
-        if(NOT TNG_IO_FOUND)
-            message(FATAL_ERROR
-                "TNG >= 1.6.0 not found. "
-                "You can set GMX_EXTERNAL_TNG=OFF to compile TNG.")
-        endif()
-        include_directories(SYSTEM ${TNG_IO_INCLUDE_DIRS})
-    endif()
-    if(NOT GMX_EXTERNAL_TNG)
-        include(${CMAKE_SOURCE_DIR}/src/external/tng_io/BuildTNG.cmake)
-        tng_get_source_list(TNG_SOURCES TNG_IO_DEFINITIONS)
-        list(APPEND LIBGROMACS_SOURCES ${TNG_SOURCES})
-        tng_set_source_properties(WITH_ZLIB ${HAVE_ZLIB})
-
-        if (HAVE_ZLIB)
-            list(APPEND GMX_EXTRA_LIBRARIES ${ZLIB_LIBRARIES})
-            include_directories(SYSTEM ${ZLIB_INCLUDE_DIRS})
-        endif()
-    endif()
-else()
-    # We still need to get tng/tng_io_fwd.h from somewhere!
-    include_directories(BEFORE ${CMAKE_SOURCE_DIR}/src/external/tng_io/include)
-endif()
-
-add_subdirectory(gmxlib)
-add_subdirectory(mdlib)
-add_subdirectory(listed-forces)
-add_subdirectory(commandline)
-add_subdirectory(domdec)
-add_subdirectory(ewald)
-add_subdirectory(fft)
-add_subdirectory(linearalgebra)
-add_subdirectory(math)
-add_subdirectory(mdrunutility)
-add_subdirectory(random)
-add_subdirectory(onlinehelp)
-add_subdirectory(options)
-add_subdirectory(pbcutil)
-add_subdirectory(timing)
-add_subdirectory(topology)
-add_subdirectory(utility)
-add_subdirectory(fileio)
-add_subdirectory(swap)
-add_subdirectory(essentialdynamics)
-add_subdirectory(pulling)
-add_subdirectory(simd)
-add_subdirectory(imd)
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    add_subdirectory(legacyheaders)
-    add_subdirectory(gmxana)
-    add_subdirectory(gmxpreprocess)
-    add_subdirectory(correlationfunctions)
-    add_subdirectory(statistics)
-    add_subdirectory(analysisdata)
-    add_subdirectory(selection)
-    add_subdirectory(trajectoryanalysis)
-    add_subdirectory(tools)
-endif()
-
-list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES})
-
-# This would be the standard way to include thread_mpi, but
-# we want libgromacs to link the functions directly
-#if(GMX_THREAD_MPI)
-#    add_subdirectory(thread_mpi)
-#endif()
-#target_link_libraries(gmx ${GMX_EXTRA_LIBRARIES} ${THREAD_MPI_LIB})
-
-tmpi_get_source_list(THREAD_MPI_SOURCES ${CMAKE_SOURCE_DIR}/src/external/thread_mpi/src)
-list(APPEND LIBGROMACS_SOURCES ${THREAD_MPI_SOURCES})
-
-configure_file(version.h.cmakein version.h)
-gmx_install_headers(
-    analysisdata.h
-    commandline.h
-    options.h
-    selection.h
-    trajectoryanalysis.h
-    utility.h
-    ${CMAKE_CURRENT_BINARY_DIR}/version.h
-    )
-
-# This code is here instead of utility/CMakeLists.txt, because CMake
-# custom commands and source file properties can only be set in the directory
-# that contains the target that uses them.
-# TODO: Generate a header instead that can be included from baseversion.c.
-# That probably simplifies things somewhat.
-set(GENERATED_VERSION_FILE utility/baseversion-gen.c)
-gmx_configure_version_file(
-    utility/baseversion-gen.c.cmakein ${GENERATED_VERSION_FILE}
-    REMOTE_HASH SOURCE_FILE)
-list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
-
-# apply gcc 4.4.x bug workaround
-if(GMX_USE_GCC44_BUG_WORKAROUND)
-   include(gmxGCC44O3BugWorkaround)
-   gmx_apply_gcc44_bug_workaround("listed-forces/bonded.cpp")
-   gmx_apply_gcc44_bug_workaround("mdlib/force.c")
-   gmx_apply_gcc44_bug_workaround("mdlib/constr.c")
-endif()
-
-if (GMX_GPU AND NOT GMX_USE_OPENCL)
-    cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}
-            OPTIONS
-            RELWITHDEBINFO -g
-            DEBUG -g -D_DEBUG_=1)
-else()
-    add_library(libgromacs ${LIBGROMACS_SOURCES})
-endif()
-
-# Recent versions of gcc and clang give warnings on scanner.cpp, which
-# is a generated source file. These are awkward to suppress inline, so
-# we do it in the compilation command (after testing that the compiler
-# supports the suppressions). Setting the properties only works after
-# the related target has been created, e.g. after when the file is
-# used with add_library().
-include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag(-Wno-unused-parameter HAS_NO_UNUSED_PARAMETER)
-if (HAS_NO_UNUSED_PARAMETER)
-    set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-unused-parameter")
-endif()
-check_cxx_compiler_flag(-Wno-deprecated-register HAS_NO_DEPRECATED_REGISTER)
-if (HAS_NO_DEPRECATED_REGISTER)
-    set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated-register")
-else()
-    check_cxx_compiler_flag(-Wno-deprecated HAS_NO_DEPRECATED)
-    if (HAS_NO_DEPRECATED)
-        set(_scanner_cpp_compiler_flags "${_scanner_cpp_compiler_flags} -Wno-deprecated")
-    endif()
-endif()
-set_source_files_properties(selection/scanner.cpp PROPERTIES COMPILE_FLAGS "${_scanner_cpp_compiler_flags}")
-
-target_link_libraries(libgromacs
-                      ${EXTRAE_LIBRARIES}
-                      ${GMX_EXTRA_LIBRARIES}
-                      ${TNG_IO_LIBRARIES}
-                      ${FFT_LIBRARIES} ${LINEAR_ALGEBRA_LIBRARIES}
-                      ${XML_LIBRARIES}
-                      ${THREAD_LIB} ${GMX_SHARED_LINKER_FLAGS} ${OPENCL_LIBRARIES})
-set_target_properties(libgromacs PROPERTIES
-                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
-                      SOVERSION ${LIBRARY_SOVERSION_MAJOR}
-                      VERSION ${LIBRARY_VERSION}
-                      COMPILE_FLAGS "${OpenMP_C_FLAGS}")
-
-gmx_write_installed_header_list()
-
-# Only install the library in mdrun-only mode if it is actually necessary
-# for the binary
-if (NOT GMX_BUILD_MDRUN_ONLY OR BUILD_SHARED_LIBS)
-    install(TARGETS libgromacs
-            EXPORT libgromacs
-            LIBRARY DESTINATION ${LIB_INSTALL_DIR}
-            RUNTIME DESTINATION ${BIN_INSTALL_DIR}
-            ARCHIVE DESTINATION ${LIB_INSTALL_DIR}
-            COMPONENT libraries)
-endif()
-
-if (NOT GMX_BUILD_MDRUN_ONLY)
-    include(InstallLibInfo.cmake)
-endif()
-
-if (INSTALL_CUDART_LIB) #can be set manual by user
-    if (GMX_GPU AND NOT GMX_USE_OPENCL)
-        foreach(CUDA_LIB ${CUDA_LIBRARIES})
-            string(REGEX MATCH "cudart" IS_CUDART ${CUDA_LIB})
-            if(IS_CUDART) #libcuda should not be installed
-                #install also name-links (linker uses those)
-                file(GLOB CUDA_LIBS ${CUDA_LIB}*)
-                install(FILES ${CUDA_LIBS} DESTINATION
-                    ${LIB_INSTALL_DIR} COMPONENT libraries)
-            endif()
-        endforeach()
-    else()
-        message(WARNING "INSTALL_CUDART_LIB only makes sense with GMX_GPU")
-    endif()
-endif()
-
-if(GMX_GPU AND GMX_USE_OPENCL)
-    set(OPENCL_KERNELS ${MDLIB_OPENCL_KERNELS})
-
-    install(FILES ${OPENCL_KERNELS} DESTINATION
-        ${OCL_INSTALL_DIR} COMPONENT libraries)
-endif()
diff --git a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/force.cpp b/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/force.cpp
deleted file mode 100644
index d06924cfa07c514f9a54e001be6a5b5fca2cc71f..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/force.cpp
+++ /dev/null
@@ -1,962 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "gromacs/legacyheaders/force.h"
-
-#include "config.h"
-
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/ewald.h"
-#include "gromacs/ewald/long-range-correction.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/nonbonded.h"
-#include "gromacs/legacyheaders/nrnb.h"
-#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/legacyheaders/qmmm.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/listed-forces/listed-forces.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/forcerec-threading.h"
-#include "gromacs/pbcutil/ishift.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/smalloc.h"
-/* PLUMED */
-#include "../../../Plumed.h"
-int    plumedswitch=0;
-plumed plumedmain;
-void(*plumedcmd)(plumed,const char*,const void*)=NULL;
-/* END PLUMED */
-
-
-void ns(FILE              *fp,
-        t_forcerec        *fr,
-        matrix             box,
-        gmx_groups_t      *groups,
-        gmx_localtop_t    *top,
-        t_mdatoms         *md,
-        t_commrec         *cr,
-        t_nrnb            *nrnb,
-        gmx_bool           bFillGrid,
-        gmx_bool           bDoLongRangeNS)
-{
-    int     nsearch;
-
-
-    if (!fr->ns.nblist_initialized)
-    {
-        init_neighbor_list(fp, fr, md->homenr);
-    }
-
-    if (fr->bTwinRange)
-    {
-        fr->nlr = 0;
-    }
-
-    nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
-                                bFillGrid, bDoLongRangeNS);
-    if (debug)
-    {
-        fprintf(debug, "nsearch = %d\n", nsearch);
-    }
-
-    /* Check whether we have to do dynamic load balancing */
-    /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
-       count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
-       &(top->idef),opts->ngener);
-     */
-    if (fr->ns.dump_nl > 0)
-    {
-        dump_nblist(fp, cr, fr, fr->ns.dump_nl);
-    }
-}
-
-static void reduce_thread_forces(int n, rvec *f,
-                                 tensor vir_q, tensor vir_lj,
-                                 real *Vcorr_q, real *Vcorr_lj,
-                                 real *dvdl_q, real *dvdl_lj,
-                                 int nthreads, f_thread_t *f_t)
-{
-    int t, i;
-    int nthreads_loop gmx_unused;
-
-    // cppcheck-suppress unreadVariable
-    nthreads_loop = gmx_omp_nthreads_get(emntBonded);
-    /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
-    for (i = 0; i < n; i++)
-    {
-        for (t = 1; t < nthreads; t++)
-        {
-            rvec_inc(f[i], f_t[t].f[i]);
-        }
-    }
-    for (t = 1; t < nthreads; t++)
-    {
-        *Vcorr_q  += f_t[t].Vcorr_q;
-        *Vcorr_lj += f_t[t].Vcorr_lj;
-        *dvdl_q   += f_t[t].dvdl[efptCOUL];
-        *dvdl_lj  += f_t[t].dvdl[efptVDW];
-        m_add(vir_q, f_t[t].vir_q, vir_q);
-        m_add(vir_lj, f_t[t].vir_lj, vir_lj);
-    }
-}
-
-void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
-                       t_idef     *idef,    t_commrec  *cr,
-                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
-                       t_mdatoms  *md,
-                       rvec       x[],      history_t  *hist,
-                       rvec       f[],
-                       rvec       f_longrange[],
-                       gmx_enerdata_t *enerd,
-                       t_fcdata   *fcd,
-                       gmx_localtop_t *top,
-                       gmx_genborn_t *born,
-                       gmx_bool       bBornRadii,
-                       matrix     box,
-                       t_lambda   *fepvals,
-                       real       *lambda,
-                       t_graph    *graph,
-                       t_blocka   *excl,
-                       rvec       mu_tot[],
-                       int        flags,
-                       float      *cycles_pme)
-{
-    int         i, j;
-    int         donb_flags;
-    gmx_bool    bSB;
-    int         pme_flags;
-    matrix      boxs;
-    rvec        box_size;
-    t_pbc       pbc;
-    real        dvdl_dum[efptNR], dvdl_nb[efptNR];
-
-#ifdef GMX_MPI
-    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
-#endif
-
-    set_pbc(&pbc, fr->ePBC, box);
-
-    /* reset free energy components */
-    for (i = 0; i < efptNR; i++)
-    {
-        dvdl_nb[i]  = 0;
-        dvdl_dum[i] = 0;
-    }
-
-    /* Reset box */
-    for (i = 0; (i < DIM); i++)
-    {
-        box_size[i] = box[i][i];
-    }
-
-    debug_gmx();
-
-    /* do QMMM first if requested */
-    if (fr->bQMMM)
-    {
-        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
-    }
-
-    /* Call the short range functions all in one go. */
-
-#ifdef GMX_MPI
-    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
-#define TAKETIME FALSE
-    if (TAKETIME)
-    {
-        MPI_Barrier(cr->mpi_comm_mygroup);
-        t0 = MPI_Wtime();
-    }
-#endif
-
-    if (ir->nwall)
-    {
-        /* foreign lambda component for walls */
-        real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
-                                   enerd->grpp.ener[egLJSR], nrnb);
-        enerd->dvdl_lin[efptVDW] += dvdl_walls;
-    }
-
-    /* If doing GB, reset dvda and calculate the Born radii */
-    if (ir->implicit_solvent)
-    {
-        wallcycle_sub_start(wcycle, ewcsNONBONDED);
-
-        for (i = 0; i < born->nr; i++)
-        {
-            fr->dvda[i] = 0;
-        }
-
-        if (bBornRadii)
-        {
-            calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
-        }
-
-        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
-    }
-
-    where();
-    /* We only do non-bonded calculation with group scheme here, the verlet
-     * calls are done from do_force_cutsVERLET(). */
-    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
-    {
-        donb_flags = 0;
-        /* Add short-range interactions */
-        donb_flags |= GMX_NONBONDED_DO_SR;
-
-        /* Currently all group scheme kernels always calculate (shift-)forces */
-        if (flags & GMX_FORCE_FORCES)
-        {
-            donb_flags |= GMX_NONBONDED_DO_FORCE;
-        }
-        if (flags & GMX_FORCE_VIRIAL)
-        {
-            donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
-        }
-        if (flags & GMX_FORCE_ENERGY)
-        {
-            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
-        }
-        if (flags & GMX_FORCE_DO_LR)
-        {
-            donb_flags |= GMX_NONBONDED_DO_LR;
-        }
-
-        wallcycle_sub_start(wcycle, ewcsNONBONDED);
-        do_nonbonded(fr, x, f, f_longrange, md, excl,
-                     &enerd->grpp, nrnb,
-                     lambda, dvdl_nb, -1, -1, donb_flags);
-
-        /* If we do foreign lambda and we have soft-core interactions
-         * we have to recalculate the (non-linear) energies contributions.
-         */
-        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
-        {
-            for (i = 0; i < enerd->n_lambda; i++)
-            {
-                real lam_i[efptNR];
-
-                for (j = 0; j < efptNR; j++)
-                {
-                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
-                }
-                reset_foreign_enerdata(enerd);
-                do_nonbonded(fr, x, f, f_longrange, md, excl,
-                             &(enerd->foreign_grpp), nrnb,
-                             lam_i, dvdl_dum, -1, -1,
-                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
-                sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
-                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
-            }
-        }
-        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
-        where();
-    }
-
-    /* If we are doing GB, calculate bonded forces and apply corrections
-     * to the solvation forces */
-    /* MRS: Eventually, many need to include free energy contribution here! */
-    if (ir->implicit_solvent)
-    {
-        wallcycle_sub_start(wcycle, ewcsLISTED);
-        calc_gb_forces(cr, md, born, top, x, f, fr, idef,
-                       ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
-        wallcycle_sub_stop(wcycle, ewcsLISTED);
-    }
-
-#ifdef GMX_MPI
-    if (TAKETIME)
-    {
-        t1          = MPI_Wtime();
-        fr->t_fnbf += t1-t0;
-    }
-#endif
-
-    if (fepvals->sc_alpha != 0)
-    {
-        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
-    }
-    else
-    {
-        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
-    }
-
-    if (fepvals->sc_alpha != 0)
-
-    /* even though coulomb part is linear, we already added it, beacuse we
-       need to go through the vdw calculation anyway */
-    {
-        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
-    }
-    else
-    {
-        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
-    }
-
-    debug_gmx();
-
-
-    if (debug)
-    {
-        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
-    }
-
-    /* Shift the coordinates. Must be done before listed forces and PPPM,
-     * but is also necessary for SHAKE and update, therefore it can NOT
-     * go when no listed forces have to be evaluated.
-     *
-     * The shifting and PBC code is deliberately not timed, since with
-     * the Verlet scheme it only takes non-zero time with triclinic
-     * boxes, and even then the time is around a factor of 100 less
-     * than the next smallest counter.
-     */
-
-
-    /* Here sometimes we would not need to shift with NBFonly,
-     * but we do so anyhow for consistency of the returned coordinates.
-     */
-    if (graph)
-    {
-        shift_self(graph, box, x);
-        if (TRICLINIC(box))
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
-        }
-        else
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
-        }
-    }
-    /* Check whether we need to do listed interactions or correct for exclusions */
-    if (fr->bMolPBC &&
-        ((flags & GMX_FORCE_LISTED)
-         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
-    {
-        /* TODO There are no electrostatics methods that require this
-           transformation, when using the Verlet scheme, so update the
-           above conditional. */
-        /* Since all atoms are in the rectangular or triclinic unit-cell,
-         * only single box vector shifts (2 in x) are required.
-         */
-        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
-    }
-    debug_gmx();
-
-    do_force_listed(wcycle, box, ir->fepvals, cr->ms,
-                    idef, (const rvec *) x, hist, f, fr,
-                    &pbc, graph, enerd, nrnb, lambda, md, fcd,
-                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
-                    flags);
-
-    where();
-
-    *cycles_pme = 0;
-    clear_mat(fr->vir_el_recip);
-    clear_mat(fr->vir_lj_recip);
-
-    /* Do long-range electrostatics and/or LJ-PME, including related short-range
-     * corrections.
-     */
-    if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
-    {
-        int  status            = 0;
-        real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
-        real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;
-
-        bSB = (ir->nwall == 2);
-        if (bSB)
-        {
-            copy_mat(box, boxs);
-            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
-            box_size[ZZ] *= ir->wall_ewald_zfac;
-        }
-
-        if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
-        {
-            real dvdl_long_range_correction_q   = 0;
-            real dvdl_long_range_correction_lj  = 0;
-            /* With the Verlet scheme exclusion forces are calculated
-             * in the non-bonded kernel.
-             */
-            /* The TPI molecule does not have exclusions with the rest
-             * of the system and no intra-molecular PME grid
-             * contributions will be calculated in
-             * gmx_pme_calc_energy.
-             */
-            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
-                ir->ewald_geometry != eewg3D ||
-                ir->epsilon_surface != 0)
-            {
-                int nthreads, t;
-
-                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
-
-                if (fr->n_tpi > 0)
-                {
-                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
-                }
-
-                nthreads = gmx_omp_nthreads_get(emntBonded);
-#pragma omp parallel for num_threads(nthreads) schedule(static)
-                for (t = 0; t < nthreads; t++)
-                {
-                    int     i;
-                    rvec   *fnv;
-                    tensor *vir_q, *vir_lj;
-                    real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
-                    if (t == 0)
-                    {
-                        fnv       = fr->f_novirsum;
-                        vir_q     = &fr->vir_el_recip;
-                        vir_lj    = &fr->vir_lj_recip;
-                        Vcorrt_q  = &Vcorr_q;
-                        Vcorrt_lj = &Vcorr_lj;
-                        dvdlt_q   = &dvdl_long_range_correction_q;
-                        dvdlt_lj  = &dvdl_long_range_correction_lj;
-                    }
-                    else
-                    {
-                        fnv       = fr->f_t[t].f;
-                        vir_q     = &fr->f_t[t].vir_q;
-                        vir_lj    = &fr->f_t[t].vir_lj;
-                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
-                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
-                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
-                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
-                        for (i = 0; i < fr->natoms_force; i++)
-                        {
-                            clear_rvec(fnv[i]);
-                        }
-                        clear_mat(*vir_q);
-                        clear_mat(*vir_lj);
-                    }
-                    *dvdlt_q  = 0;
-                    *dvdlt_lj = 0;
-
-                    ewald_LRcorrection(md->homenr, cr, nthreads, t, fr,
-                                       md->chargeA, md->chargeB,
-                                       md->sqrt_c6A, md->sqrt_c6B,
-                                       md->sigmaA, md->sigmaB,
-                                       md->sigma3A, md->sigma3B,
-                                       md->nChargePerturbed || md->nTypePerturbed,
-                                       ir->cutoff_scheme != ecutsVERLET,
-                                       excl, x, bSB ? boxs : box, mu_tot,
-                                       ir->ewald_geometry,
-                                       ir->epsilon_surface,
-                                       fnv, *vir_q, *vir_lj,
-                                       Vcorrt_q, Vcorrt_lj,
-                                       lambda[efptCOUL], lambda[efptVDW],
-                                       dvdlt_q, dvdlt_lj);
-                }
-                if (nthreads > 1)
-                {
-                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
-                                         fr->vir_el_recip, fr->vir_lj_recip,
-                                         &Vcorr_q, &Vcorr_lj,
-                                         &dvdl_long_range_correction_q,
-                                         &dvdl_long_range_correction_lj,
-                                         nthreads, fr->f_t);
-                }
-                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
-            }
-
-            if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
-            {
-                /* This is not in a subcounter because it takes a
-                   negligible and constant-sized amount of time */
-                Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
-                                                   &dvdl_long_range_correction_q,
-                                                   fr->vir_el_recip);
-            }
-
-            enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
-            enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;
-
-            if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
-            {
-                /* Do reciprocal PME for Coulomb and/or LJ. */
-                assert(fr->n_tpi >= 0);
-                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
-                {
-                    pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
-                    if (EEL_PME(fr->eeltype))
-                    {
-                        pme_flags     |= GMX_PME_DO_COULOMB;
-                    }
-                    if (EVDW_PME(fr->vdwtype))
-                    {
-                        pme_flags |= GMX_PME_DO_LJ;
-                    }
-                    if (flags & GMX_FORCE_FORCES)
-                    {
-                        pme_flags |= GMX_PME_CALC_F;
-                    }
-                    if (flags & GMX_FORCE_VIRIAL)
-                    {
-                        pme_flags |= GMX_PME_CALC_ENER_VIR;
-                    }
-                    if (fr->n_tpi > 0)
-                    {
-                        /* We don't calculate f, but we do want the potential */
-                        pme_flags |= GMX_PME_CALC_POT;
-                    }
-                    wallcycle_start(wcycle, ewcPMEMESH);
-                    status = gmx_pme_do(fr->pmedata,
-                                        0, md->homenr - fr->n_tpi,
-                                        x, fr->f_novirsum,
-                                        md->chargeA, md->chargeB,
-                                        md->sqrt_c6A, md->sqrt_c6B,
-                                        md->sigmaA, md->sigmaB,
-                                        bSB ? boxs : box, cr,
-                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
-                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
-                                        nrnb, wcycle,
-                                        fr->vir_el_recip, fr->ewaldcoeff_q,
-                                        fr->vir_lj_recip, fr->ewaldcoeff_lj,
-                                        &Vlr_q, &Vlr_lj,
-                                        lambda[efptCOUL], lambda[efptVDW],
-                                        &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
-                    *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
-                    if (status != 0)
-                    {
-                        gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
-                    }
-                    /* We should try to do as little computation after
-                     * this as possible, because parallel PME synchronizes
-                     * the nodes, so we want all load imbalance of the
-                     * rest of the force calculation to be before the PME
-                     * call.  DD load balancing is done on the whole time
-                     * of the force call (without PME).
-                     */
-                }
-                if (fr->n_tpi > 0)
-                {
-                    if (EVDW_PME(ir->vdwtype))
-                    {
-
-                        gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
-                    }
-                    /* Determine the PME grid energy of the test molecule
-                     * with the PME grid potential of the other charges.
-                     */
-                    gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
-                                        x + md->homenr - fr->n_tpi,
-                                        md->chargeA + md->homenr - fr->n_tpi,
-                                        &Vlr_q);
-                }
-            }
-        }
-
-        if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
-        {
-            Vlr_q = do_ewald(ir, x, fr->f_novirsum,
-                             md->chargeA, md->chargeB,
-                             box_size, cr, md->homenr,
-                             fr->vir_el_recip, fr->ewaldcoeff_q,
-                             lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
-        }
-
-        /* Note that with separate PME nodes we get the real energies later */
-        enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
-        enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
-        enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
-        enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
-        if (debug)
-        {
-            fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
-                    Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
-            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
-            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
-            fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
-                    Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
-            pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
-        }
-    }
-    else
-    {
-        /* Is there a reaction-field exclusion correction needed? */
-        if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
-        {
-            /* With the Verlet scheme, exclusion forces are calculated
-             * in the non-bonded kernel.
-             */
-            if (ir->cutoff_scheme != ecutsVERLET)
-            {
-                real dvdl_rf_excl      = 0;
-                enerd->term[F_RF_EXCL] =
-                    RF_excl_correction(fr, graph, md, excl, x, f,
-                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
-
-                enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
-            }
-        }
-    }
-    where();
-    debug_gmx();
-
-    if (debug)
-    {
-        print_nrnb(debug, nrnb);
-    }
-    debug_gmx();
-
-#ifdef GMX_MPI
-    if (TAKETIME)
-    {
-        t2 = MPI_Wtime();
-        MPI_Barrier(cr->mpi_comm_mygroup);
-        t3          = MPI_Wtime();
-        fr->t_wait += t3-t2;
-        if (fr->timesteps == 11)
-        {
-            char buf[22];
-            fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
-                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
-                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
-                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
-        }
-        fr->timesteps++;
-    }
-#endif
-
-    if (debug)
-    {
-        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
-    }
-
-    /* PLUMED */
-    if(plumedswitch){
-      int plumedNeedsEnergy;
-      (*plumedcmd)(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
-      if(!plumedNeedsEnergy) (*plumedcmd)(plumedmain,"performCalc",NULL);
-    }
-    /* END PLUMED */
-}
-
-void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
-{
-    int i, n2;
-
-    for (i = 0; i < F_NRE; i++)
-    {
-        enerd->term[i]         = 0;
-        enerd->foreign_term[i] = 0;
-    }
-
-
-    for (i = 0; i < efptNR; i++)
-    {
-        enerd->dvdl_lin[i]     = 0;
-        enerd->dvdl_nonlin[i]  = 0;
-    }
-
-    n2 = ngener*ngener;
-    if (debug)
-    {
-        fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
-    }
-    enerd->grpp.nener         = n2;
-    enerd->foreign_grpp.nener = n2;
-    for (i = 0; (i < egNR); i++)
-    {
-        snew(enerd->grpp.ener[i], n2);
-        snew(enerd->foreign_grpp.ener[i], n2);
-    }
-
-    if (n_lambda)
-    {
-        enerd->n_lambda = 1 + n_lambda;
-        snew(enerd->enerpart_lambda, enerd->n_lambda);
-    }
-    else
-    {
-        enerd->n_lambda = 0;
-    }
-}
-
-void destroy_enerdata(gmx_enerdata_t *enerd)
-{
-    int i;
-
-    for (i = 0; (i < egNR); i++)
-    {
-        sfree(enerd->grpp.ener[i]);
-    }
-
-    for (i = 0; (i < egNR); i++)
-    {
-        sfree(enerd->foreign_grpp.ener[i]);
-    }
-
-    if (enerd->n_lambda)
-    {
-        sfree(enerd->enerpart_lambda);
-    }
-}
-
-static real sum_v(int n, real v[])
-{
-    real t;
-    int  i;
-
-    t = 0.0;
-    for (i = 0; (i < n); i++)
-    {
-        t = t + v[i];
-    }
-
-    return t;
-}
-
-void sum_epot(gmx_grppairener_t *grpp, real *epot)
-{
-    int i;
-
-    /* Accumulate energies */
-    epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
-    epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
-    epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
-    epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
-    epot[F_COUL_LR]  = sum_v(grpp->nener, grpp->ener[egCOULLR]);
-    epot[F_LJ_LR]    = sum_v(grpp->nener, grpp->ener[egLJLR]);
-    /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
-    epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
-
-/* lattice part of LR doesnt belong to any group
- * and has been added earlier
- */
-    epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
-    epot[F_BHAM_LR]  = sum_v(grpp->nener, grpp->ener[egBHAMLR]);
-
-    epot[F_EPOT] = 0;
-    for (i = 0; (i < F_EPOT); i++)
-    {
-        if (i != F_DISRESVIOL && i != F_ORIRESDEV)
-        {
-            epot[F_EPOT] += epot[i];
-        }
-    }
-}
-
-void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
-{
-    int    i, j, index;
-    double dlam;
-
-    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
-    enerd->term[F_DVDL]       = 0.0;
-    for (i = 0; i < efptNR; i++)
-    {
-        if (fepvals->separate_dvdl[i])
-        {
-            /* could this be done more readably/compactly? */
-            switch (i)
-            {
-                case (efptMASS):
-                    index = F_DKDL;
-                    break;
-                case (efptCOUL):
-                    index = F_DVDL_COUL;
-                    break;
-                case (efptVDW):
-                    index = F_DVDL_VDW;
-                    break;
-                case (efptBONDED):
-                    index = F_DVDL_BONDED;
-                    break;
-                case (efptRESTRAINT):
-                    index = F_DVDL_RESTRAINT;
-                    break;
-                default:
-                    index = F_DVDL;
-                    break;
-            }
-            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
-            if (debug)
-            {
-                fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
-                        efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
-            }
-        }
-        else
-        {
-            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
-            if (debug)
-            {
-                fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
-                        efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
-            }
-        }
-    }
-
-    /* Notes on the foreign lambda free energy difference evaluation:
-     * Adding the potential and ekin terms that depend linearly on lambda
-     * as delta lam * dvdl to the energy differences is exact.
-     * For the constraints this is not exact, but we have no other option
-     * without literally changing the lengths and reevaluating the energies at each step.
-     * (try to remedy this post 4.6 - MRS)
-     * For the non-bonded LR term we assume that the soft-core (if present)
-     * no longer affects the energy beyond the short-range cut-off,
-     * which is a very good approximation (except for exotic settings).
-     * (investigate how to overcome this post 4.6 - MRS)
-     */
-    if (fepvals->separate_dvdl[efptBONDED])
-    {
-        enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
-    }
-    else
-    {
-        enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
-    }
-    enerd->term[F_DVDL_CONSTR] = 0;
-
-    for (i = 0; i < fepvals->n_lambda; i++)
-    {
-        /* note we are iterating over fepvals here!
-           For the current lam, dlam = 0 automatically,
-           so we don't need to add anything to the
-           enerd->enerpart_lambda[0] */
-
-        /* we don't need to worry about dvdl_lin contributions to dE at
-           current lambda, because the contributions to the current
-           lambda are automatically zeroed */
-
-        for (j = 0; j < efptNR; j++)
-        {
-            /* Note that this loop is over all dhdl components, not just the separated ones */
-            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
-            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
-            if (debug)
-            {
-                fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
-                        fepvals->all_lambda[j][i], efpt_names[j],
-                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
-                        dlam, enerd->dvdl_lin[j]);
-            }
-        }
-    }
-}
-
-
-void reset_foreign_enerdata(gmx_enerdata_t *enerd)
-{
-    int  i, j;
-
-    /* First reset all foreign energy components.  Foreign energies always called on
-       neighbor search steps */
-    for (i = 0; (i < egNR); i++)
-    {
-        for (j = 0; (j < enerd->grpp.nener); j++)
-        {
-            enerd->foreign_grpp.ener[i][j] = 0.0;
-        }
-    }
-
-    /* potential energy components */
-    for (i = 0; (i <= F_EPOT); i++)
-    {
-        enerd->foreign_term[i] = 0.0;
-    }
-}
-
-void reset_enerdata(t_forcerec *fr, gmx_bool bNS,
-                    gmx_enerdata_t *enerd,
-                    gmx_bool bMaster)
-{
-    gmx_bool bKeepLR;
-    int      i, j;
-
-    /* First reset all energy components, except for the long range terms
-     * on the master at non neighbor search steps, since the long range
-     * terms have already been summed at the last neighbor search step.
-     */
-    bKeepLR = (fr->bTwinRange && !bNS);
-    for (i = 0; (i < egNR); i++)
-    {
-        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR)))
-        {
-            for (j = 0; (j < enerd->grpp.nener); j++)
-            {
-                enerd->grpp.ener[i][j] = 0.0;
-            }
-        }
-    }
-    for (i = 0; i < efptNR; i++)
-    {
-        enerd->dvdl_lin[i]    = 0.0;
-        enerd->dvdl_nonlin[i] = 0.0;
-    }
-
-    /* Normal potential energy components */
-    for (i = 0; (i <= F_EPOT); i++)
-    {
-        enerd->term[i] = 0.0;
-    }
-    /* Initialize the dVdlambda term with the long range contribution */
-    /* Initialize the dvdl term with the long range contribution */
-    enerd->term[F_DVDL]            = 0.0;
-    enerd->term[F_DVDL_COUL]       = 0.0;
-    enerd->term[F_DVDL_VDW]        = 0.0;
-    enerd->term[F_DVDL_BONDED]     = 0.0;
-    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
-    enerd->term[F_DKDL]            = 0.0;
-    if (enerd->n_lambda > 0)
-    {
-        for (i = 0; i < enerd->n_lambda; i++)
-        {
-            enerd->enerpart_lambda[i] = 0.0;
-        }
-    }
-    /* reset foreign energy data - separate function since we also call it elsewhere */
-    reset_foreign_enerdata(enerd);
-}
diff --git a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/force.cpp.preplumed b/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/force.cpp.preplumed
deleted file mode 100644
index c6d18a862ee109a32130b14b50c08caa66b9dcee..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/force.cpp.preplumed
+++ /dev/null
@@ -1,948 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "gromacs/legacyheaders/force.h"
-
-#include "config.h"
-
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/ewald.h"
-#include "gromacs/ewald/long-range-correction.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/nonbonded.h"
-#include "gromacs/legacyheaders/nrnb.h"
-#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/legacyheaders/qmmm.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/listed-forces/listed-forces.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/forcerec-threading.h"
-#include "gromacs/pbcutil/ishift.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/smalloc.h"
-
-void ns(FILE              *fp,
-        t_forcerec        *fr,
-        matrix             box,
-        gmx_groups_t      *groups,
-        gmx_localtop_t    *top,
-        t_mdatoms         *md,
-        t_commrec         *cr,
-        t_nrnb            *nrnb,
-        gmx_bool           bFillGrid,
-        gmx_bool           bDoLongRangeNS)
-{
-    int     nsearch;
-
-
-    if (!fr->ns.nblist_initialized)
-    {
-        init_neighbor_list(fp, fr, md->homenr);
-    }
-
-    if (fr->bTwinRange)
-    {
-        fr->nlr = 0;
-    }
-
-    nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
-                                bFillGrid, bDoLongRangeNS);
-    if (debug)
-    {
-        fprintf(debug, "nsearch = %d\n", nsearch);
-    }
-
-    /* Check whether we have to do dynamic load balancing */
-    /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
-       count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
-       &(top->idef),opts->ngener);
-     */
-    if (fr->ns.dump_nl > 0)
-    {
-        dump_nblist(fp, cr, fr, fr->ns.dump_nl);
-    }
-}
-
-static void reduce_thread_forces(int n, rvec *f,
-                                 tensor vir_q, tensor vir_lj,
-                                 real *Vcorr_q, real *Vcorr_lj,
-                                 real *dvdl_q, real *dvdl_lj,
-                                 int nthreads, f_thread_t *f_t)
-{
-    int t, i;
-    int nthreads_loop gmx_unused;
-
-    // cppcheck-suppress unreadVariable
-    nthreads_loop = gmx_omp_nthreads_get(emntBonded);
-    /* This reduction can run over any number of threads */
-#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
-    for (i = 0; i < n; i++)
-    {
-        for (t = 1; t < nthreads; t++)
-        {
-            rvec_inc(f[i], f_t[t].f[i]);
-        }
-    }
-    for (t = 1; t < nthreads; t++)
-    {
-        *Vcorr_q  += f_t[t].Vcorr_q;
-        *Vcorr_lj += f_t[t].Vcorr_lj;
-        *dvdl_q   += f_t[t].dvdl[efptCOUL];
-        *dvdl_lj  += f_t[t].dvdl[efptVDW];
-        m_add(vir_q, f_t[t].vir_q, vir_q);
-        m_add(vir_lj, f_t[t].vir_lj, vir_lj);
-    }
-}
-
-void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
-                       t_idef     *idef,    t_commrec  *cr,
-                       t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
-                       t_mdatoms  *md,
-                       rvec       x[],      history_t  *hist,
-                       rvec       f[],
-                       rvec       f_longrange[],
-                       gmx_enerdata_t *enerd,
-                       t_fcdata   *fcd,
-                       gmx_localtop_t *top,
-                       gmx_genborn_t *born,
-                       gmx_bool       bBornRadii,
-                       matrix     box,
-                       t_lambda   *fepvals,
-                       real       *lambda,
-                       t_graph    *graph,
-                       t_blocka   *excl,
-                       rvec       mu_tot[],
-                       int        flags,
-                       float      *cycles_pme)
-{
-    int         i, j;
-    int         donb_flags;
-    gmx_bool    bSB;
-    int         pme_flags;
-    matrix      boxs;
-    rvec        box_size;
-    t_pbc       pbc;
-    real        dvdl_dum[efptNR], dvdl_nb[efptNR];
-
-#ifdef GMX_MPI
-    double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
-#endif
-
-    set_pbc(&pbc, fr->ePBC, box);
-
-    /* reset free energy components */
-    for (i = 0; i < efptNR; i++)
-    {
-        dvdl_nb[i]  = 0;
-        dvdl_dum[i] = 0;
-    }
-
-    /* Reset box */
-    for (i = 0; (i < DIM); i++)
-    {
-        box_size[i] = box[i][i];
-    }
-
-    debug_gmx();
-
-    /* do QMMM first if requested */
-    if (fr->bQMMM)
-    {
-        enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
-    }
-
-    /* Call the short range functions all in one go. */
-
-#ifdef GMX_MPI
-    /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
-#define TAKETIME FALSE
-    if (TAKETIME)
-    {
-        MPI_Barrier(cr->mpi_comm_mygroup);
-        t0 = MPI_Wtime();
-    }
-#endif
-
-    if (ir->nwall)
-    {
-        /* foreign lambda component for walls */
-        real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
-                                   enerd->grpp.ener[egLJSR], nrnb);
-        enerd->dvdl_lin[efptVDW] += dvdl_walls;
-    }
-
-    /* If doing GB, reset dvda and calculate the Born radii */
-    if (ir->implicit_solvent)
-    {
-        wallcycle_sub_start(wcycle, ewcsNONBONDED);
-
-        for (i = 0; i < born->nr; i++)
-        {
-            fr->dvda[i] = 0;
-        }
-
-        if (bBornRadii)
-        {
-            calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
-        }
-
-        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
-    }
-
-    where();
-    /* We only do non-bonded calculation with group scheme here, the verlet
-     * calls are done from do_force_cutsVERLET(). */
-    if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
-    {
-        donb_flags = 0;
-        /* Add short-range interactions */
-        donb_flags |= GMX_NONBONDED_DO_SR;
-
-        /* Currently all group scheme kernels always calculate (shift-)forces */
-        if (flags & GMX_FORCE_FORCES)
-        {
-            donb_flags |= GMX_NONBONDED_DO_FORCE;
-        }
-        if (flags & GMX_FORCE_VIRIAL)
-        {
-            donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
-        }
-        if (flags & GMX_FORCE_ENERGY)
-        {
-            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
-        }
-        if (flags & GMX_FORCE_DO_LR)
-        {
-            donb_flags |= GMX_NONBONDED_DO_LR;
-        }
-
-        wallcycle_sub_start(wcycle, ewcsNONBONDED);
-        do_nonbonded(fr, x, f, f_longrange, md, excl,
-                     &enerd->grpp, nrnb,
-                     lambda, dvdl_nb, -1, -1, donb_flags);
-
-        /* If we do foreign lambda and we have soft-core interactions
-         * we have to recalculate the (non-linear) energies contributions.
-         */
-        if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
-        {
-            for (i = 0; i < enerd->n_lambda; i++)
-            {
-                real lam_i[efptNR];
-
-                for (j = 0; j < efptNR; j++)
-                {
-                    lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
-                }
-                reset_foreign_enerdata(enerd);
-                do_nonbonded(fr, x, f, f_longrange, md, excl,
-                             &(enerd->foreign_grpp), nrnb,
-                             lam_i, dvdl_dum, -1, -1,
-                             (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
-                sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
-                enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
-            }
-        }
-        wallcycle_sub_stop(wcycle, ewcsNONBONDED);
-        where();
-    }
-
-    /* If we are doing GB, calculate bonded forces and apply corrections
-     * to the solvation forces */
-    /* MRS: Eventually, many need to include free energy contribution here! */
-    if (ir->implicit_solvent)
-    {
-        wallcycle_sub_start(wcycle, ewcsLISTED);
-        calc_gb_forces(cr, md, born, top, x, f, fr, idef,
-                       ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
-        wallcycle_sub_stop(wcycle, ewcsLISTED);
-    }
-
-#ifdef GMX_MPI
-    if (TAKETIME)
-    {
-        t1          = MPI_Wtime();
-        fr->t_fnbf += t1-t0;
-    }
-#endif
-
-    if (fepvals->sc_alpha != 0)
-    {
-        enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
-    }
-    else
-    {
-        enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
-    }
-
-    if (fepvals->sc_alpha != 0)
-
-    /* even though coulomb part is linear, we already added it, beacuse we
-       need to go through the vdw calculation anyway */
-    {
-        enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
-    }
-    else
-    {
-        enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
-    }
-
-    debug_gmx();
-
-
-    if (debug)
-    {
-        pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
-    }
-
-    /* Shift the coordinates. Must be done before listed forces and PPPM,
-     * but is also necessary for SHAKE and update, therefore it can NOT
-     * go when no listed forces have to be evaluated.
-     *
-     * The shifting and PBC code is deliberately not timed, since with
-     * the Verlet scheme it only takes non-zero time with triclinic
-     * boxes, and even then the time is around a factor of 100 less
-     * than the next smallest counter.
-     */
-
-
-    /* Here sometimes we would not need to shift with NBFonly,
-     * but we do so anyhow for consistency of the returned coordinates.
-     */
-    if (graph)
-    {
-        shift_self(graph, box, x);
-        if (TRICLINIC(box))
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
-        }
-        else
-        {
-            inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
-        }
-    }
-    /* Check whether we need to do listed interactions or correct for exclusions */
-    if (fr->bMolPBC &&
-        ((flags & GMX_FORCE_LISTED)
-         || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
-    {
-        /* TODO There are no electrostatics methods that require this
-           transformation, when using the Verlet scheme, so update the
-           above conditional. */
-        /* Since all atoms are in the rectangular or triclinic unit-cell,
-         * only single box vector shifts (2 in x) are required.
-         */
-        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
-    }
-    debug_gmx();
-
-    do_force_listed(wcycle, box, ir->fepvals, cr->ms,
-                    idef, (const rvec *) x, hist, f, fr,
-                    &pbc, graph, enerd, nrnb, lambda, md, fcd,
-                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
-                    flags);
-
-    where();
-
-    *cycles_pme = 0;
-    clear_mat(fr->vir_el_recip);
-    clear_mat(fr->vir_lj_recip);
-
-    /* Do long-range electrostatics and/or LJ-PME, including related short-range
-     * corrections.
-     */
-    if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
-    {
-        int  status            = 0;
-        real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
-        real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;
-
-        bSB = (ir->nwall == 2);
-        if (bSB)
-        {
-            copy_mat(box, boxs);
-            svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
-            box_size[ZZ] *= ir->wall_ewald_zfac;
-        }
-
-        if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
-        {
-            real dvdl_long_range_correction_q   = 0;
-            real dvdl_long_range_correction_lj  = 0;
-            /* With the Verlet scheme exclusion forces are calculated
-             * in the non-bonded kernel.
-             */
-            /* The TPI molecule does not have exclusions with the rest
-             * of the system and no intra-molecular PME grid
-             * contributions will be calculated in
-             * gmx_pme_calc_energy.
-             */
-            if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
-                ir->ewald_geometry != eewg3D ||
-                ir->epsilon_surface != 0)
-            {
-                int nthreads, t;
-
-                wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
-
-                if (fr->n_tpi > 0)
-                {
-                    gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
-                }
-
-                nthreads = gmx_omp_nthreads_get(emntBonded);
-#pragma omp parallel for num_threads(nthreads) schedule(static)
-                for (t = 0; t < nthreads; t++)
-                {
-                    int     i;
-                    rvec   *fnv;
-                    tensor *vir_q, *vir_lj;
-                    real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
-                    if (t == 0)
-                    {
-                        fnv       = fr->f_novirsum;
-                        vir_q     = &fr->vir_el_recip;
-                        vir_lj    = &fr->vir_lj_recip;
-                        Vcorrt_q  = &Vcorr_q;
-                        Vcorrt_lj = &Vcorr_lj;
-                        dvdlt_q   = &dvdl_long_range_correction_q;
-                        dvdlt_lj  = &dvdl_long_range_correction_lj;
-                    }
-                    else
-                    {
-                        fnv       = fr->f_t[t].f;
-                        vir_q     = &fr->f_t[t].vir_q;
-                        vir_lj    = &fr->f_t[t].vir_lj;
-                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
-                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
-                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
-                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
-                        for (i = 0; i < fr->natoms_force; i++)
-                        {
-                            clear_rvec(fnv[i]);
-                        }
-                        clear_mat(*vir_q);
-                        clear_mat(*vir_lj);
-                    }
-                    *dvdlt_q  = 0;
-                    *dvdlt_lj = 0;
-
-                    ewald_LRcorrection(md->homenr, cr, nthreads, t, fr,
-                                       md->chargeA, md->chargeB,
-                                       md->sqrt_c6A, md->sqrt_c6B,
-                                       md->sigmaA, md->sigmaB,
-                                       md->sigma3A, md->sigma3B,
-                                       md->nChargePerturbed || md->nTypePerturbed,
-                                       ir->cutoff_scheme != ecutsVERLET,
-                                       excl, x, bSB ? boxs : box, mu_tot,
-                                       ir->ewald_geometry,
-                                       ir->epsilon_surface,
-                                       fnv, *vir_q, *vir_lj,
-                                       Vcorrt_q, Vcorrt_lj,
-                                       lambda[efptCOUL], lambda[efptVDW],
-                                       dvdlt_q, dvdlt_lj);
-                }
-                if (nthreads > 1)
-                {
-                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
-                                         fr->vir_el_recip, fr->vir_lj_recip,
-                                         &Vcorr_q, &Vcorr_lj,
-                                         &dvdl_long_range_correction_q,
-                                         &dvdl_long_range_correction_lj,
-                                         nthreads, fr->f_t);
-                }
-                wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
-            }
-
-            if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
-            {
-                /* This is not in a subcounter because it takes a
-                   negligible and constant-sized amount of time */
-                Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
-                                                   &dvdl_long_range_correction_q,
-                                                   fr->vir_el_recip);
-            }
-
-            enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
-            enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;
-
-            if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
-            {
-                /* Do reciprocal PME for Coulomb and/or LJ. */
-                assert(fr->n_tpi >= 0);
-                if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
-                {
-                    pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
-                    if (EEL_PME(fr->eeltype))
-                    {
-                        pme_flags     |= GMX_PME_DO_COULOMB;
-                    }
-                    if (EVDW_PME(fr->vdwtype))
-                    {
-                        pme_flags |= GMX_PME_DO_LJ;
-                    }
-                    if (flags & GMX_FORCE_FORCES)
-                    {
-                        pme_flags |= GMX_PME_CALC_F;
-                    }
-                    if (flags & GMX_FORCE_VIRIAL)
-                    {
-                        pme_flags |= GMX_PME_CALC_ENER_VIR;
-                    }
-                    if (fr->n_tpi > 0)
-                    {
-                        /* We don't calculate f, but we do want the potential */
-                        pme_flags |= GMX_PME_CALC_POT;
-                    }
-                    wallcycle_start(wcycle, ewcPMEMESH);
-                    status = gmx_pme_do(fr->pmedata,
-                                        0, md->homenr - fr->n_tpi,
-                                        x, fr->f_novirsum,
-                                        md->chargeA, md->chargeB,
-                                        md->sqrt_c6A, md->sqrt_c6B,
-                                        md->sigmaA, md->sigmaB,
-                                        bSB ? boxs : box, cr,
-                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
-                                        DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
-                                        nrnb, wcycle,
-                                        fr->vir_el_recip, fr->ewaldcoeff_q,
-                                        fr->vir_lj_recip, fr->ewaldcoeff_lj,
-                                        &Vlr_q, &Vlr_lj,
-                                        lambda[efptCOUL], lambda[efptVDW],
-                                        &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
-                    *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
-                    if (status != 0)
-                    {
-                        gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
-                    }
-                    /* We should try to do as little computation after
-                     * this as possible, because parallel PME synchronizes
-                     * the nodes, so we want all load imbalance of the
-                     * rest of the force calculation to be before the PME
-                     * call.  DD load balancing is done on the whole time
-                     * of the force call (without PME).
-                     */
-                }
-                if (fr->n_tpi > 0)
-                {
-                    if (EVDW_PME(ir->vdwtype))
-                    {
-
-                        gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
-                    }
-                    /* Determine the PME grid energy of the test molecule
-                     * with the PME grid potential of the other charges.
-                     */
-                    gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
-                                        x + md->homenr - fr->n_tpi,
-                                        md->chargeA + md->homenr - fr->n_tpi,
-                                        &Vlr_q);
-                }
-            }
-        }
-
-        if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
-        {
-            Vlr_q = do_ewald(ir, x, fr->f_novirsum,
-                             md->chargeA, md->chargeB,
-                             box_size, cr, md->homenr,
-                             fr->vir_el_recip, fr->ewaldcoeff_q,
-                             lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
-        }
-
-        /* Note that with separate PME nodes we get the real energies later */
-        enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
-        enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
-        enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
-        enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
-        if (debug)
-        {
-            fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
-                    Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
-            pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
-            pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
-            fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
-                    Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
-            pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
-        }
-    }
-    else
-    {
-        /* Is there a reaction-field exclusion correction needed? */
-        if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
-        {
-            /* With the Verlet scheme, exclusion forces are calculated
-             * in the non-bonded kernel.
-             */
-            if (ir->cutoff_scheme != ecutsVERLET)
-            {
-                real dvdl_rf_excl      = 0;
-                enerd->term[F_RF_EXCL] =
-                    RF_excl_correction(fr, graph, md, excl, x, f,
-                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
-
-                enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
-            }
-        }
-    }
-    where();
-    debug_gmx();
-
-    if (debug)
-    {
-        print_nrnb(debug, nrnb);
-    }
-    debug_gmx();
-
-#ifdef GMX_MPI
-    if (TAKETIME)
-    {
-        t2 = MPI_Wtime();
-        MPI_Barrier(cr->mpi_comm_mygroup);
-        t3          = MPI_Wtime();
-        fr->t_wait += t3-t2;
-        if (fr->timesteps == 11)
-        {
-            char buf[22];
-            fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
-                    cr->nodeid, gmx_step_str(fr->timesteps, buf),
-                    100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
-                    (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
-        }
-        fr->timesteps++;
-    }
-#endif
-
-    if (debug)
-    {
-        pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
-    }
-
-}
-
-void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
-{
-    int i, n2;
-
-    for (i = 0; i < F_NRE; i++)
-    {
-        enerd->term[i]         = 0;
-        enerd->foreign_term[i] = 0;
-    }
-
-
-    for (i = 0; i < efptNR; i++)
-    {
-        enerd->dvdl_lin[i]     = 0;
-        enerd->dvdl_nonlin[i]  = 0;
-    }
-
-    n2 = ngener*ngener;
-    if (debug)
-    {
-        fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
-    }
-    enerd->grpp.nener         = n2;
-    enerd->foreign_grpp.nener = n2;
-    for (i = 0; (i < egNR); i++)
-    {
-        snew(enerd->grpp.ener[i], n2);
-        snew(enerd->foreign_grpp.ener[i], n2);
-    }
-
-    if (n_lambda)
-    {
-        enerd->n_lambda = 1 + n_lambda;
-        snew(enerd->enerpart_lambda, enerd->n_lambda);
-    }
-    else
-    {
-        enerd->n_lambda = 0;
-    }
-}
-
-void destroy_enerdata(gmx_enerdata_t *enerd)
-{
-    int i;
-
-    for (i = 0; (i < egNR); i++)
-    {
-        sfree(enerd->grpp.ener[i]);
-    }
-
-    for (i = 0; (i < egNR); i++)
-    {
-        sfree(enerd->foreign_grpp.ener[i]);
-    }
-
-    if (enerd->n_lambda)
-    {
-        sfree(enerd->enerpart_lambda);
-    }
-}
-
-static real sum_v(int n, real v[])
-{
-    real t;
-    int  i;
-
-    t = 0.0;
-    for (i = 0; (i < n); i++)
-    {
-        t = t + v[i];
-    }
-
-    return t;
-}
-
-void sum_epot(gmx_grppairener_t *grpp, real *epot)
-{
-    int i;
-
-    /* Accumulate energies */
-    epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
-    epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
-    epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
-    epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
-    epot[F_COUL_LR]  = sum_v(grpp->nener, grpp->ener[egCOULLR]);
-    epot[F_LJ_LR]    = sum_v(grpp->nener, grpp->ener[egLJLR]);
-    /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
-    epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
-
-/* lattice part of LR doesnt belong to any group
- * and has been added earlier
- */
-    epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
-    epot[F_BHAM_LR]  = sum_v(grpp->nener, grpp->ener[egBHAMLR]);
-
-    epot[F_EPOT] = 0;
-    for (i = 0; (i < F_EPOT); i++)
-    {
-        if (i != F_DISRESVIOL && i != F_ORIRESDEV)
-        {
-            epot[F_EPOT] += epot[i];
-        }
-    }
-}
-
-void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
-{
-    int    i, j, index;
-    double dlam;
-
-    enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
-    enerd->term[F_DVDL]       = 0.0;
-    for (i = 0; i < efptNR; i++)
-    {
-        if (fepvals->separate_dvdl[i])
-        {
-            /* could this be done more readably/compactly? */
-            switch (i)
-            {
-                case (efptMASS):
-                    index = F_DKDL;
-                    break;
-                case (efptCOUL):
-                    index = F_DVDL_COUL;
-                    break;
-                case (efptVDW):
-                    index = F_DVDL_VDW;
-                    break;
-                case (efptBONDED):
-                    index = F_DVDL_BONDED;
-                    break;
-                case (efptRESTRAINT):
-                    index = F_DVDL_RESTRAINT;
-                    break;
-                default:
-                    index = F_DVDL;
-                    break;
-            }
-            enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
-            if (debug)
-            {
-                fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
-                        efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
-            }
-        }
-        else
-        {
-            enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
-            if (debug)
-            {
-                fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
-                        efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
-            }
-        }
-    }
-
-    /* Notes on the foreign lambda free energy difference evaluation:
-     * Adding the potential and ekin terms that depend linearly on lambda
-     * as delta lam * dvdl to the energy differences is exact.
-     * For the constraints this is not exact, but we have no other option
-     * without literally changing the lengths and reevaluating the energies at each step.
-     * (try to remedy this post 4.6 - MRS)
-     * For the non-bonded LR term we assume that the soft-core (if present)
-     * no longer affects the energy beyond the short-range cut-off,
-     * which is a very good approximation (except for exotic settings).
-     * (investigate how to overcome this post 4.6 - MRS)
-     */
-    if (fepvals->separate_dvdl[efptBONDED])
-    {
-        enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
-    }
-    else
-    {
-        enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
-    }
-    enerd->term[F_DVDL_CONSTR] = 0;
-
-    for (i = 0; i < fepvals->n_lambda; i++)
-    {
-        /* note we are iterating over fepvals here!
-           For the current lam, dlam = 0 automatically,
-           so we don't need to add anything to the
-           enerd->enerpart_lambda[0] */
-
-        /* we don't need to worry about dvdl_lin contributions to dE at
-           current lambda, because the contributions to the current
-           lambda are automatically zeroed */
-
-        for (j = 0; j < efptNR; j++)
-        {
-            /* Note that this loop is over all dhdl components, not just the separated ones */
-            dlam = (fepvals->all_lambda[j][i]-lambda[j]);
-            enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
-            if (debug)
-            {
-                fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
-                        fepvals->all_lambda[j][i], efpt_names[j],
-                        (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
-                        dlam, enerd->dvdl_lin[j]);
-            }
-        }
-    }
-}
-
-
-void reset_foreign_enerdata(gmx_enerdata_t *enerd)
-{
-    int  i, j;
-
-    /* First reset all foreign energy components.  Foreign energies always called on
-       neighbor search steps */
-    for (i = 0; (i < egNR); i++)
-    {
-        for (j = 0; (j < enerd->grpp.nener); j++)
-        {
-            enerd->foreign_grpp.ener[i][j] = 0.0;
-        }
-    }
-
-    /* potential energy components */
-    for (i = 0; (i <= F_EPOT); i++)
-    {
-        enerd->foreign_term[i] = 0.0;
-    }
-}
-
-void reset_enerdata(t_forcerec *fr, gmx_bool bNS,
-                    gmx_enerdata_t *enerd,
-                    gmx_bool bMaster)
-{
-    gmx_bool bKeepLR;
-    int      i, j;
-
-    /* First reset all energy components, except for the long range terms
-     * on the master at non neighbor search steps, since the long range
-     * terms have already been summed at the last neighbor search step.
-     */
-    bKeepLR = (fr->bTwinRange && !bNS);
-    for (i = 0; (i < egNR); i++)
-    {
-        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR)))
-        {
-            for (j = 0; (j < enerd->grpp.nener); j++)
-            {
-                enerd->grpp.ener[i][j] = 0.0;
-            }
-        }
-    }
-    for (i = 0; i < efptNR; i++)
-    {
-        enerd->dvdl_lin[i]    = 0.0;
-        enerd->dvdl_nonlin[i] = 0.0;
-    }
-
-    /* Normal potential energy components */
-    for (i = 0; (i <= F_EPOT); i++)
-    {
-        enerd->term[i] = 0.0;
-    }
-    /* Initialize the dVdlambda term with the long range contribution */
-    /* Initialize the dvdl term with the long range contribution */
-    enerd->term[F_DVDL]            = 0.0;
-    enerd->term[F_DVDL_COUL]       = 0.0;
-    enerd->term[F_DVDL_VDW]        = 0.0;
-    enerd->term[F_DVDL_BONDED]     = 0.0;
-    enerd->term[F_DVDL_RESTRAINT]  = 0.0;
-    enerd->term[F_DKDL]            = 0.0;
-    if (enerd->n_lambda > 0)
-    {
-        for (i = 0; i < enerd->n_lambda; i++)
-        {
-            enerd->enerpart_lambda[i] = 0.0;
-        }
-    }
-    /* reset foreign energy data - separate function since we also call it elsewhere */
-    reset_foreign_enerdata(enerd);
-}
diff --git a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/minimize.cpp b/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/minimize.cpp
deleted file mode 100644
index 1caed3367c95d9113b08c558b304ccb2feafee0c..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/minimize.cpp
+++ /dev/null
@@ -1,2967 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-#include <string.h>
-#include <time.h>
-
-#include <algorithm>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/confio.h"
-#include "gromacs/fileio/mtxio.h"
-#include "gromacs/fileio/trajectory_writing.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdebin.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/nrnb.h"
-#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/legacyheaders/sim_util.h"
-#include "gromacs/legacyheaders/tgroup.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/update.h"
-#include "gromacs/legacyheaders/vsite.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/linearalgebra/sparsematrix.h"
-#include "gromacs/listed-forces/manage-threading.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/smalloc.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-extern void(*plumedcmd)(plumed,const char*,const void*);
-/* END PLUMED */
-
-typedef struct {
-    t_state  s;
-    rvec    *f;
-    real     epot;
-    real     fnorm;
-    real     fmax;
-    int      a_fmax;
-} em_state_t;
-
-static em_state_t *init_em_state()
-{
-    em_state_t *ems;
-
-    snew(ems, 1);
-
-    /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
-    snew(ems->s.lambda, efptNR);
-
-    return ems;
-}
-
-static void print_em_start(FILE                     *fplog,
-                           t_commrec                *cr,
-                           gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
-                           const char               *name)
-{
-    walltime_accounting_start(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, name);
-}
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
-                        gmx_wallcycle_t           wcycle)
-{
-    wallcycle_stop(wcycle, ewcRUN);
-
-    walltime_accounting_end(walltime_accounting);
-}
-
-static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
-{
-    fprintf(out, "\n");
-    fprintf(out, "%s:\n", minimizer);
-    fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
-    fprintf(out, "   Number of steps    = %12d\n", nsteps);
-}
-
-static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
-{
-    char buffer[2048];
-    if (bLastStep)
-    {
-        sprintf(buffer,
-                "\nEnergy minimization reached the maximum number "
-                "of steps before the forces reached the requested "
-                "precision Fmax < %g.\n", ftol);
-    }
-    else
-    {
-        sprintf(buffer,
-                "\nEnergy minimization has stopped, but the forces have "
-                "not converged to the requested precision Fmax < %g (which "
-                "may not be possible for your system). It stopped "
-                "because the algorithm tried to make a new step whose size "
-                "was too small, or there was no change in the energy since "
-                "last step. Either way, we regard the minimization as "
-                "converged to within the available machine precision, "
-                "given your starting configuration and EM parameters.\n%s%s",
-                ftol,
-                sizeof(real) < sizeof(double) ?
-                "\nDouble precision normally gives you higher accuracy, but "
-                "this is often not needed for preparing to run molecular "
-                "dynamics.\n" :
-                "",
-                bConstrain ?
-                "You might need to increase your constraint accuracy, or turn\n"
-                "off constraints altogether (set constraints = none in mdp file)\n" :
-                "");
-    }
-    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
-}
-
-
-
-static void print_converged(FILE *fp, const char *alg, real ftol,
-                            gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
-                            real epot, real fmax, int nfmax, real fnorm)
-{
-    char buf[STEPSTRSIZE];
-
-    if (bDone)
-    {
-        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
-                alg, ftol, gmx_step_str(count, buf));
-    }
-    else if (count < nsteps)
-    {
-        fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
-                "but did not reach the requested Fmax < %g.\n",
-                alg, gmx_step_str(count, buf), ftol);
-    }
-    else
-    {
-        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
-                alg, ftol, gmx_step_str(count, buf));
-    }
-
-#ifdef GMX_DOUBLE
-    fprintf(fp, "Potential Energy  = %21.14e\n", epot);
-    fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
-    fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
-#else
-    fprintf(fp, "Potential Energy  = %14.7e\n", epot);
-    fprintf(fp, "Maximum force     = %14.7e on atom %d\n", fmax, nfmax+1);
-    fprintf(fp, "Norm of force     = %14.7e\n", fnorm);
-#endif
-}
-
-static void get_f_norm_max(t_commrec *cr,
-                           t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
-                           real *fnorm, real *fmax, int *a_fmax)
-{
-    double fnorm2, *sum;
-    real   fmax2, fam;
-    int    la_max, a_max, start, end, i, m, gf;
-
-    /* This routine finds the largest force and returns it.
-     * On parallel machines the global max is taken.
-     */
-    fnorm2 = 0;
-    fmax2  = 0;
-    la_max = -1;
-    start  = 0;
-    end    = mdatoms->homenr;
-    if (mdatoms->cFREEZE)
-    {
-        for (i = start; i < end; i++)
-        {
-            gf  = mdatoms->cFREEZE[i];
-            fam = 0;
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    fam += sqr(f[i][m]);
-                }
-            }
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-    else
-    {
-        for (i = start; i < end; i++)
-        {
-            fam     = norm2(f[i]);
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-
-    if (la_max >= 0 && DOMAINDECOMP(cr))
-    {
-        a_max = cr->dd->gatindex[la_max];
-    }
-    else
-    {
-        a_max = la_max;
-    }
-    if (PAR(cr))
-    {
-        snew(sum, 2*cr->nnodes+1);
-        sum[2*cr->nodeid]   = fmax2;
-        sum[2*cr->nodeid+1] = a_max;
-        sum[2*cr->nnodes]   = fnorm2;
-        gmx_sumd(2*cr->nnodes+1, sum, cr);
-        fnorm2 = sum[2*cr->nnodes];
-        /* Determine the global maximum */
-        for (i = 0; i < cr->nnodes; i++)
-        {
-            if (sum[2*i] > fmax2)
-            {
-                fmax2 = sum[2*i];
-                a_max = (int)(sum[2*i+1] + 0.5);
-            }
-        }
-        sfree(sum);
-    }
-
-    if (fnorm)
-    {
-        *fnorm = sqrt(fnorm2);
-    }
-    if (fmax)
-    {
-        *fmax  = sqrt(fmax2);
-    }
-    if (a_fmax)
-    {
-        *a_fmax = a_max;
-    }
-}
-
-static void get_state_f_norm_max(t_commrec *cr,
-                                 t_grpopts *opts, t_mdatoms *mdatoms,
-                                 em_state_t *ems)
-{
-    get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
-}
-
-void init_em(FILE *fplog, const char *title,
-             t_commrec *cr, t_inputrec *ir,
-             t_state *state_global, gmx_mtop_t *top_global,
-             em_state_t *ems, gmx_localtop_t **top,
-             rvec **f, rvec **f_global,
-             t_nrnb *nrnb, rvec mu_tot,
-             t_forcerec *fr, gmx_enerdata_t **enerd,
-             t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int nfile, const t_filenm fnm[],
-             gmx_mdoutf_t *outf, t_mdebin **mdebin,
-             int imdport, unsigned long gmx_unused Flags,
-             gmx_wallcycle_t wcycle)
-{
-    int  i;
-    real dvdl_constr;
-
-    if (fplog)
-    {
-        fprintf(fplog, "Initiating %s\n", title);
-    }
-
-    state_global->ngtc = 0;
-
-    /* Initialize lambda variables */
-    initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
-
-    init_nrnb(nrnb);
-
-    /* Interactive molecular dynamics */
-    init_IMD(ir, cr, top_global, fplog, 1, state_global->x,
-             nfile, fnm, NULL, imdport, Flags);
-
-    if (DOMAINDECOMP(cr))
-    {
-        *top = dd_init_local_top(top_global);
-
-        dd_init_local_state(cr->dd, state_global, &ems->s);
-
-        *f = NULL;
-
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
-                            state_global, top_global, ir,
-                            &ems->s, &ems->f, mdatoms, *top,
-                            fr, vsite, NULL, constr,
-                            nrnb, NULL, FALSE);
-        dd_store_state(cr->dd, &ems->s);
-
-        if (ir->nstfout)
-        {
-            snew(*f_global, top_global->natoms);
-        }
-        else
-        {
-            *f_global = NULL;
-        }
-        *graph = NULL;
-    }
-    else
-    {
-        snew(*f, top_global->natoms);
-
-        /* Just copy the state */
-        ems->s = *state_global;
-        snew(ems->s.x, ems->s.nalloc);
-        snew(ems->f, ems->s.nalloc);
-        for (i = 0; i < state_global->natoms; i++)
-        {
-            copy_rvec(state_global->x[i], ems->s.x[i]);
-        }
-        copy_mat(state_global->box, ems->s.box);
-
-        *top      = gmx_mtop_generate_local_top(top_global, ir);
-        *f_global = *f;
-
-        setup_bonded_threading(fr, &(*top)->idef);
-
-        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
-        {
-            *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE);
-        }
-        else
-        {
-            *graph = NULL;
-        }
-
-        atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);
-        update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
-
-        if (vsite)
-        {
-            set_vsite_top(vsite, *top, mdatoms, cr);
-        }
-    }
-
-    if (constr)
-    {
-        if (ir->eConstrAlg == econtSHAKE &&
-            gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
-        {
-            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
-                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
-        }
-
-        if (!DOMAINDECOMP(cr))
-        {
-            set_constraints(constr, *top, ir, mdatoms, cr);
-        }
-
-        if (!ir->bContinuation)
-        {
-            /* Constrain the starting coordinates */
-            dvdl_constr = 0;
-            constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
-                      ir, cr, -1, 0, 1.0, mdatoms,
-                      ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
-                      ems->s.lambda[efptFEP], &dvdl_constr,
-                      NULL, NULL, nrnb, econqCoord);
-        }
-    }
-
-    if (PAR(cr))
-    {
-        *gstat = global_stat_init(ir);
-    }
-    else
-    {
-        *gstat = NULL;
-    }
-
-    *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle);
-
-    snew(*enerd, 1);
-    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
-                  *enerd);
-
-    if (mdebin != NULL)
-    {
-        /* Init bin for energy stuff */
-        *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL);
-    }
-
-    clear_rvec(mu_tot);
-    calc_shifts(ems->s.box, fr->shift_vec);
-
-    /* PLUMED */
-    if(plumedswitch){
-      if(cr->ms && cr->ms->nsim>1) {
-        if(MASTER(cr)) (*plumedcmd) (plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters);
-        if(PAR(cr)){
-          if(DOMAINDECOMP(cr)) {
-            (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
-          }else{
-            (*plumedcmd) (plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
-          }
-        }
-        (*plumedcmd) (plumedmain,"GREX init",NULL);
-      }
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          (*plumedcmd) (plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
-        }else{
-          (*plumedcmd) (plumedmain,"setMPIComm",&cr->mpi_comm_mysim);
-        }
-      }
-      (*plumedcmd) (plumedmain,"setNatoms",&top_global->natoms);
-      (*plumedcmd) (plumedmain,"setMDEngine","gromacs");
-      (*plumedcmd) (plumedmain,"setLog",fplog);
-      real real_delta_t;
-      real_delta_t=ir->delta_t;
-      (*plumedcmd) (plumedmain,"setTimestep",&real_delta_t);
-      (*plumedcmd) (plumedmain,"init",NULL);
-
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
-          (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex);
-        }
-      }
-    }
-    /* END PLUMED */
-}
-
-static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
-                      gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t wcycle)
-{
-    if (!(cr->duty & DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    done_mdoutf(outf);
-
-    em_time_end(walltime_accounting, wcycle);
-}
-
-static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
-{
-    em_state_t tmp;
-
-    tmp   = *ems1;
-    *ems1 = *ems2;
-    *ems2 = tmp;
-}
-
-static void copy_em_coords(em_state_t *ems, t_state *state)
-{
-    int i;
-
-    for (i = 0; (i < state->natoms); i++)
-    {
-        copy_rvec(ems->s.x[i], state->x[i]);
-    }
-}
-
-static void write_em_traj(FILE *fplog, t_commrec *cr,
-                          gmx_mdoutf_t outf,
-                          gmx_bool bX, gmx_bool bF, const char *confout,
-                          gmx_mtop_t *top_global,
-                          t_inputrec *ir, gmx_int64_t step,
-                          em_state_t *state,
-                          t_state *state_global, rvec *f_global)
-{
-    int      mdof_flags;
-    gmx_bool bIMDout = FALSE;
-
-
-    /* Shall we do IMD output? */
-    if (ir->bIMD)
-    {
-        bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup));
-    }
-
-    if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr))
-    {
-        copy_em_coords(state, state_global);
-        f_global = state->f;
-    }
-
-    mdof_flags = 0;
-    if (bX)
-    {
-        mdof_flags |= MDOF_X;
-    }
-    if (bF)
-    {
-        mdof_flags |= MDOF_F;
-    }
-
-    /* If we want IMD output, set appropriate MDOF flag */
-    if (ir->bIMD)
-    {
-        mdof_flags |= MDOF_IMD;
-    }
-
-    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
-                                     top_global, step, (double)step,
-                                     &state->s, state_global, state->f, f_global);
-
-    if (confout != NULL && MASTER(cr))
-    {
-        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
-        {
-            /* Make molecules whole only for confout writing */
-            do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
-                        state_global->x);
-        }
-
-        write_sto_conf_mtop(confout,
-                            *top_global->name, top_global,
-                            state_global->x, NULL, ir->ePBC, state_global->box);
-    }
-}
-
-static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
-                       gmx_bool bMolPBC,
-                       em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
-                       gmx_constr_t constr, gmx_localtop_t *top,
-                       t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                       gmx_int64_t count)
-
-{
-    t_state *s1, *s2;
-    int      start, end;
-    real     dvdl_constr;
-    int      nthreads gmx_unused;
-
-    s1 = &ems1->s;
-    s2 = &ems2->s;
-
-    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
-    {
-        gmx_incons("state mismatch in do_em_step");
-    }
-
-    s2->flags = s1->flags;
-
-    if (s2->nalloc != s1->nalloc)
-    {
-        s2->nalloc = s1->nalloc;
-        srenew(s2->x, s1->nalloc);
-        srenew(ems2->f,  s1->nalloc);
-        if (s2->flags & (1<<estCGP))
-        {
-            srenew(s2->cg_p,  s1->nalloc);
-        }
-    }
-
-    s2->natoms = s1->natoms;
-    copy_mat(s1->box, s2->box);
-    /* Copy free energy state */
-    for (int i = 0; i < efptNR; i++)
-    {
-        s2->lambda[i] = s1->lambda[i];
-    }
-    copy_mat(s1->box, s2->box);
-
-    start = 0;
-    end   = md->homenr;
-
-    // cppcheck-suppress unreadVariable
-    nthreads = gmx_omp_nthreads_get(emntUpdate);
-#pragma omp parallel num_threads(nthreads)
-    {
-        rvec *x1 = s1->x;
-        rvec *x2 = s2->x;
-
-        int   gf = 0;
-#pragma omp for schedule(static) nowait
-        for (int i = start; i < end; i++)
-        {
-            if (md->cFREEZE)
-            {
-                gf = md->cFREEZE[i];
-            }
-            for (int m = 0; m < DIM; m++)
-            {
-                if (ir->opts.nFreeze[gf][m])
-                {
-                    x2[i][m] = x1[i][m];
-                }
-                else
-                {
-                    x2[i][m] = x1[i][m] + a*f[i][m];
-                }
-            }
-        }
-
-        if (s2->flags & (1<<estCGP))
-        {
-            /* Copy the CG p vector */
-            rvec *p1 = s1->cg_p;
-            rvec *p2 = s2->cg_p;
-#pragma omp for schedule(static) nowait
-            for (int i = start; i < end; i++)
-            {
-                copy_rvec(p1[i], p2[i]);
-            }
-        }
-
-        if (DOMAINDECOMP(cr))
-        {
-            s2->ddp_count = s1->ddp_count;
-            if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
-            {
-#pragma omp barrier
-                s2->cg_gl_nalloc = s1->cg_gl_nalloc;
-                srenew(s2->cg_gl, s2->cg_gl_nalloc);
-#pragma omp barrier
-            }
-            s2->ncg_gl = s1->ncg_gl;
-#pragma omp for schedule(static) nowait
-            for (int i = 0; i < s2->ncg_gl; i++)
-            {
-                s2->cg_gl[i] = s1->cg_gl[i];
-            }
-            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
-        }
-    }
-
-    if (constr)
-    {
-        wallcycle_start(wcycle, ewcCONSTR);
-        dvdl_constr = 0;
-        constrain(NULL, TRUE, TRUE, constr, &top->idef,
-                  ir, cr, count, 0, 1.0, md,
-                  s1->x, s2->x, NULL, bMolPBC, s2->box,
-                  s2->lambda[efptBONDED], &dvdl_constr,
-                  NULL, NULL, nrnb, econqCoord);
-        wallcycle_stop(wcycle, ewcCONSTR);
-    }
-}
-
-static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
-                                   gmx_mtop_t *top_global, t_inputrec *ir,
-                                   em_state_t *ems, gmx_localtop_t *top,
-                                   t_mdatoms *mdatoms, t_forcerec *fr,
-                                   gmx_vsite_t *vsite, gmx_constr_t constr,
-                                   t_nrnb *nrnb, gmx_wallcycle_t wcycle)
-{
-    /* Repartition the domain decomposition */
-    dd_partition_system(fplog, step, cr, FALSE, 1,
-                        NULL, top_global, ir,
-                        &ems->s, &ems->f,
-                        mdatoms, top, fr, vsite, NULL, constr,
-                        nrnb, wcycle, FALSE);
-    dd_store_state(cr->dd, &ems->s);
-}
-
-static void evaluate_energy(FILE *fplog, t_commrec *cr,
-                            gmx_mtop_t *top_global,
-                            em_state_t *ems, gmx_localtop_t *top,
-                            t_inputrec *inputrec,
-                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                            gmx_global_stat_t gstat,
-                            gmx_vsite_t *vsite, gmx_constr_t constr,
-                            t_fcdata *fcd,
-                            t_graph *graph, t_mdatoms *mdatoms,
-                            t_forcerec *fr, rvec mu_tot,
-                            gmx_enerdata_t *enerd, tensor vir, tensor pres,
-                            gmx_int64_t count, gmx_bool bFirst)
-{
-    real     t;
-    gmx_bool bNS;
-    tensor   force_vir, shake_vir, ekin;
-    real     dvdl_constr, prescorr, enercorr, dvdlcorr;
-    real     terminate = 0;
-
-    /* Set the time to the initial time, the time does not change during EM */
-    t = inputrec->init_t;
-
-    if (bFirst ||
-        (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
-    {
-        /* This is the first state or an old state used before the last ns */
-        bNS = TRUE;
-    }
-    else
-    {
-        bNS = FALSE;
-        if (inputrec->nstlist > 0)
-        {
-            bNS = TRUE;
-        }
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, ems->s.x, 1, NULL,
-                         top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, ems->s.box);
-    }
-
-    if (DOMAINDECOMP(cr) && bNS)
-    {
-        /* Repartition the domain decomposition */
-        em_dd_partition_system(fplog, count, cr, top_global, inputrec,
-                               ems, top, mdatoms, fr, vsite, constr,
-                               nrnb, wcycle);
-        /* PLUMED */
-        if(plumedswitch){
-          (*plumedcmd) (plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
-          (*plumedcmd) (plumedmain,"setAtomsGatindex",cr->dd->gatindex);
-        }
-        /* END PLUMED */
-    }
-
-    /* Calc force & energy on new trial position  */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    /* PLUMED */
-    int plumedNeedsEnergy=0;
-    matrix plumed_vir;
-    if(plumedswitch){
-      long int lstep=count; (*plumedcmd)(plumedmain,"setStepLong",&lstep);
-      (*plumedcmd) (plumedmain,"setPositions",&ems->s.x[0][0]);
-      (*plumedcmd) (plumedmain,"setMasses",&mdatoms->massT[0]);
-      (*plumedcmd) (plumedmain,"setCharges",&mdatoms->chargeA[0]);
-      (*plumedcmd) (plumedmain,"setBox",&ems->s.box[0][0]);
-      (*plumedcmd) (plumedmain,"prepareCalc",NULL);
-      (*plumedcmd) (plumedmain,"setForces",&ems->f[0][0]);
-      (*plumedcmd) (plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
-      clear_mat(plumed_vir);
-      (*plumedcmd) (plumedmain,"setVirial",&plumed_vir[0][0]);
-    }
-    /* END PLUMED */
-    do_force(fplog, cr, inputrec,
-             count, nrnb, wcycle, top, &top_global->groups,
-             ems->s.box, ems->s.x, &ems->s.hist,
-             ems->f, force_vir, mdatoms, enerd, fcd,
-             ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
-             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
-             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
-             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
-    /* PLUMED */
-    if(plumedswitch){
-      if(plumedNeedsEnergy) {
-        msmul(force_vir,2.0,plumed_vir);
-        (*plumedcmd) (plumedmain,"setEnergy",&enerd->term[F_EPOT]);
-        (*plumedcmd) (plumedmain,"performCalc",NULL);
-        msmul(plumed_vir,0.5,force_vir);
-      } else {
-        msmul(plumed_vir,0.5,plumed_vir);
-        m_add(force_vir,plumed_vir,force_vir);
-      }
-    }
-    /* END PLUMED */
-
-    /* Clear the unused shake virial and pressure */
-    clear_mat(shake_vir);
-    clear_mat(pres);
-
-    /* Communicate stuff when parallel */
-    if (PAR(cr) && inputrec->eI != eiNM)
-    {
-        wallcycle_start(wcycle, ewcMoveE);
-
-        global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
-                    inputrec, NULL, NULL, NULL, 1, &terminate,
-                    top_global, &ems->s, FALSE,
-                    CGLO_ENERGY |
-                    CGLO_PRESSURE |
-                    CGLO_CONSTRAINT);
-
-        wallcycle_stop(wcycle, ewcMoveE);
-    }
-
-    /* Calculate long range corrections to pressure and energy */
-    calc_dispcorr(inputrec, fr, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
-                  pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
-    enerd->term[F_DISPCORR] = enercorr;
-    enerd->term[F_EPOT]    += enercorr;
-    enerd->term[F_PRES]    += prescorr;
-    enerd->term[F_DVDL]    += dvdlcorr;
-
-    ems->epot = enerd->term[F_EPOT];
-
-    if (constr)
-    {
-        /* Project out the constraint components of the force */
-        wallcycle_start(wcycle, ewcCONSTR);
-        dvdl_constr = 0;
-        constrain(NULL, FALSE, FALSE, constr, &top->idef,
-                  inputrec, cr, count, 0, 1.0, mdatoms,
-                  ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
-                  ems->s.lambda[efptBONDED], &dvdl_constr,
-                  NULL, &shake_vir, nrnb, econqForceDispl);
-        enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-        m_add(force_vir, shake_vir, vir);
-        wallcycle_stop(wcycle, ewcCONSTR);
-    }
-    else
-    {
-        copy_mat(force_vir, vir);
-    }
-
-    clear_mat(ekin);
-    enerd->term[F_PRES] =
-        calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
-
-    sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
-
-    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
-    {
-        get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
-    }
-}
-
-static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
-                              gmx_mtop_t *mtop,
-                              em_state_t *s_min, em_state_t *s_b)
-{
-    rvec          *fm, *fb, *fmg;
-    t_block       *cgs_gl;
-    int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
-    double         partsum;
-    unsigned char *grpnrFREEZE;
-
-    if (debug)
-    {
-        fprintf(debug, "Doing reorder_partsum\n");
-    }
-
-    fm = s_min->f;
-    fb = s_b->f;
-
-    cgs_gl = dd_charge_groups_global(cr->dd);
-    index  = cgs_gl->index;
-
-    /* Collect fm in a global vector fmg.
-     * This conflicts with the spirit of domain decomposition,
-     * but to fully optimize this a much more complicated algorithm is required.
-     */
-    snew(fmg, mtop->natoms);
-
-    ncg   = s_min->s.ncg_gl;
-    cg_gl = s_min->s.cg_gl;
-    i     = 0;
-    for (c = 0; c < ncg; c++)
-    {
-        cg = cg_gl[c];
-        a0 = index[cg];
-        a1 = index[cg+1];
-        for (a = a0; a < a1; a++)
-        {
-            copy_rvec(fm[i], fmg[a]);
-            i++;
-        }
-    }
-    gmx_sum(mtop->natoms*3, fmg[0], cr);
-
-    /* Now we will determine the part of the sum for the cgs in state s_b */
-    ncg         = s_b->s.ncg_gl;
-    cg_gl       = s_b->s.cg_gl;
-    partsum     = 0;
-    i           = 0;
-    gf          = 0;
-    grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
-    for (c = 0; c < ncg; c++)
-    {
-        cg = cg_gl[c];
-        a0 = index[cg];
-        a1 = index[cg+1];
-        for (a = a0; a < a1; a++)
-        {
-            if (mdatoms->cFREEZE && grpnrFREEZE)
-            {
-                gf = grpnrFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
-                }
-            }
-            i++;
-        }
-    }
-
-    sfree(fmg);
-
-    return partsum;
-}
-
-static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
-                    gmx_mtop_t *mtop,
-                    em_state_t *s_min, em_state_t *s_b)
-{
-    rvec  *fm, *fb;
-    double sum;
-    int    gf, i, m;
-
-    /* This is just the classical Polak-Ribiere calculation of beta;
-     * it looks a bit complicated since we take freeze groups into account,
-     * and might have to sum it in parallel runs.
-     */
-
-    if (!DOMAINDECOMP(cr) ||
-        (s_min->s.ddp_count == cr->dd->ddp_count &&
-         s_b->s.ddp_count   == cr->dd->ddp_count))
-    {
-        fm  = s_min->f;
-        fb  = s_b->f;
-        sum = 0;
-        gf  = 0;
-        /* This part of code can be incorrect with DD,
-         * since the atom ordering in s_b and s_min might differ.
-         */
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    sum += (fb[i][m] - fm[i][m])*fb[i][m];
-                }
-            }
-        }
-    }
-    else
-    {
-        /* We need to reorder cgs while summing */
-        sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
-    }
-    if (PAR(cr))
-    {
-        gmx_sumd(1, &sum, cr);
-    }
-
-    return sum/sqr(s_min->fnorm);
-}
-
-double do_cg(FILE *fplog, t_commrec *cr,
-             int nfile, const t_filenm fnm[],
-             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
-             int gmx_unused nstglobalcomm,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int gmx_unused stepout,
-             t_inputrec *inputrec,
-             gmx_mtop_t *top_global, t_fcdata *fcd,
-             t_state *state_global,
-             t_mdatoms *mdatoms,
-             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-             gmx_edsam_t gmx_unused ed,
-             t_forcerec *fr,
-             int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-             gmx_membed_t gmx_unused membed,
-             real gmx_unused cpt_period, real gmx_unused max_hours,
-             int imdport,
-             unsigned long gmx_unused Flags,
-             gmx_walltime_accounting_t walltime_accounting)
-{
-    const char       *CG = "Polak-Ribiere Conjugate Gradients";
-
-    em_state_t       *s_min, *s_a, *s_b, *s_c;
-    gmx_localtop_t   *top;
-    gmx_enerdata_t   *enerd;
-    rvec             *f;
-    gmx_global_stat_t gstat;
-    t_graph          *graph;
-    rvec             *f_global, *p, *sf;
-    double            gpa, gpb, gpc, tmp, minstep;
-    real              fnormn;
-    real              stepsize;
-    real              a, b, c, beta = 0.0;
-    real              epot_repl = 0;
-    real              pnorm;
-    t_mdebin         *mdebin;
-    gmx_bool          converged, foundlower;
-    rvec              mu_tot;
-    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
-    tensor            vir, pres;
-    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
-    gmx_mdoutf_t      outf;
-    int               i, m, gf, step, nminstep;
-
-    step = 0;
-
-    s_min = init_em_state();
-    s_a   = init_em_state();
-    s_b   = init_em_state();
-    s_c   = init_em_state();
-
-    /* Init em and store the local state in s_min */
-    init_em(fplog, CG, cr, inputrec,
-            state_global, top_global, s_min, &top, &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        sp_header(stderr, CG, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, CG, inputrec->em_tol, number_steps);
-    }
-
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    evaluate_energy(fplog, cr,
-                    top_global, s_min, top,
-                    inputrec, nrnb, wcycle, gstat,
-                    vsite, constr, fcd, graph, mdatoms, fr,
-                    mu_tot, enerd, vir, pres, -1, TRUE);
-    where();
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                   mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
-                   NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-        print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
-        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-    }
-    where();
-
-    /* Estimate/guess the initial stepsize */
-    stepsize = inputrec->em_stepsize/s_min->fnorm;
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
-                s_min->fmax, s_min->a_fmax+1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n",
-                s_min->fnorm/sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
-                s_min->fmax, s_min->a_fmax+1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n",
-                s_min->fnorm/sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-    /* Start the loop over CG steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
-    {
-
-        /* start taking steps in a new direction
-         * First time we enter the routine, beta=0, and the direction is
-         * simply the negative gradient.
-         */
-
-        /* Calculate the new direction in p, and the gradient in this direction, gpa */
-        p   = s_min->s.cg_p;
-        sf  = s_min->f;
-        gpa = 0;
-        gf  = 0;
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!inputrec->opts.nFreeze[gf][m])
-                {
-                    p[i][m] = sf[i][m] + beta*p[i][m];
-                    gpa    -= p[i][m]*sf[i][m];
-                    /* f is negative gradient, thus the sign */
-                }
-                else
-                {
-                    p[i][m] = 0;
-                }
-            }
-        }
-
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpa, cr);
-        }
-
-        /* Calculate the norm of the search vector */
-        get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
-
-        /* Just in case stepsize reaches zero due to numerical precision... */
-        if (stepsize <= 0)
-        {
-            stepsize = inputrec->em_stepsize/pnorm;
-        }
-
-        /*
-         * Double check the value of the derivative in the search direction.
-         * If it is positive it must be due to the old information in the
-         * CG formula, so just remove that and start over with beta=0.
-         * This corresponds to a steepest descent step.
-         */
-        if (gpa > 0)
-        {
-            beta = 0;
-            step--;   /* Don't count this step since we are restarting */
-            continue; /* Go back to the beginning of the big for-loop */
-        }
-
-        /* Calculate minimum allowed stepsize, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        minstep = 0;
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                tmp = fabs(s_min->s.x[i][m]);
-                if (tmp < 1.0)
-                {
-                    tmp = 1.0;
-                }
-                tmp      = p[i][m]/tmp;
-                minstep += tmp*tmp;
-            }
-        }
-        /* Add up from all CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &minstep, cr);
-        }
-
-        minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
-                      top_global, inputrec, step,
-                      s_min, state_global, f_global);
-
-        /* Take a step downhill.
-         * In theory, we should minimize the function along this direction.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new CG step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * the continue straight to the next CG step without trying to find any minimum.
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to even accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-        s_a->epot = s_min->epot;
-        a         = 0.0;
-        c         = a + stepsize; /* reference position along line is zero */
-
-        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
-        {
-            em_dd_partition_system(fplog, step, cr, top_global, inputrec,
-                                   s_min, top, mdatoms, fr, vsite, constr,
-                                   nrnb, wcycle);
-        }
-
-        /* Take a trial step (new coords in s_c) */
-        do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
-                   constr, top, nrnb, wcycle, -1);
-
-        neval++;
-        /* Calculate energy for the trial step */
-        evaluate_energy(fplog, cr,
-                        top_global, s_c, top,
-                        inputrec, nrnb, wcycle, gstat,
-                        vsite, constr, fcd, graph, mdatoms, fr,
-                        mu_tot, enerd, vir, pres, -1, FALSE);
-
-        /* Calc derivative along line */
-        p   = s_c->s.cg_p;
-        sf  = s_c->f;
-        gpc = 0;
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
-            }
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        /* This is the max amount of increase in energy we tolerate */
-        tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
-
-        /* Accept the step if the energy is lower, or if it is not significantly higher
-         * and the line derivative is still negative.
-         */
-        if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
-        {
-            foundlower = TRUE;
-            /* Great, we found a better energy. Increase step for next iteration
-             * if we are still going down, decrease it otherwise
-             */
-            if (gpc < 0)
-            {
-                stepsize *= 1.618034; /* The golden section */
-            }
-            else
-            {
-                stepsize *= 0.618034; /* 1/golden section */
-            }
-        }
-        else
-        {
-            /* New energy is the same or higher. We will have to do some work
-             * to find a smaller value in the interval. Take smaller step next time!
-             */
-            foundlower = FALSE;
-            stepsize  *= 0.618034;
-        }
-
-
-
-
-        /* OK, if we didn't find a lower value we will have to locate one now - there must
-         * be one in the interval [a=0,c].
-         * The same thing is valid here, though: Don't spend dozens of iterations to find
-         * the line minimum. We try to interpolate based on the derivative at the endpoints,
-         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
-         *
-         * I also have a safeguard for potentially really pathological functions so we never
-         * take more than 20 steps before we give up ...
-         *
-         * If we already found a lower value we just skip this step and continue to the update.
-         */
-        if (!foundlower)
-        {
-            nminstep = 0;
-
-            do
-            {
-                /* Select a new trial point.
-                 * If the derivatives at points a & c have different sign we interpolate to zero,
-                 * otherwise just do a bisection.
-                 */
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa*(a-c)/(gpc-gpa);
-                }
-                else
-                {
-                    b = 0.5*(a+c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5*(a+c);
-                }
-
-                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-                {
-                    /* Reload the old state */
-                    em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
-                                           s_min, top, mdatoms, fr, vsite, constr,
-                                           nrnb, wcycle);
-                }
-
-                /* Take a trial step to this new point - new coords in s_b */
-                do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
-                           constr, top, nrnb, wcycle, -1);
-
-                neval++;
-                /* Calculate energy for the trial step */
-                evaluate_energy(fplog, cr,
-                                top_global, s_b, top,
-                                inputrec, nrnb, wcycle, gstat,
-                                vsite, constr, fcd, graph, mdatoms, fr,
-                                mu_tot, enerd, vir, pres, -1, FALSE);
-
-                /* p does not change within a step, but since the domain decomposition
-                 * might change, we have to use cg_p of s_b here.
-                 */
-                p   = s_b->s.cg_p;
-                sf  = s_b->f;
-                gpb = 0;
-                for (i = 0; i < mdatoms->homenr; i++)
-                {
-                    for (m = 0; m < DIM; m++)
-                    {
-                        gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
-                    }
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                if (debug)
-                {
-                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
-                            s_a->epot, s_b->epot, s_c->epot, gpb);
-                }
-
-                epot_repl = s_b->epot;
-
-                /* Keep one of the intervals based on the value of the derivative at the new point */
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    swap_em_state(s_b, s_c);
-                    c   = b;
-                    gpc = gpb;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    swap_em_state(s_b, s_a);
-                    a   = b;
-                    gpa = gpb;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            }
-            while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
-                   (nminstep < 20));
-
-            if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
-                nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If beta==0 this was steepest descent, and then we give up.
-                 * If not, set beta=0 and restart with steepest descent before quitting.
-                 */
-                if (beta == 0.0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory before giving up */
-                    beta = 0.0;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in B.
-             */
-            if (s_c->epot < s_a->epot)
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
-                            s_c->epot, s_a->epot);
-                }
-                swap_em_state(s_b, s_c);
-                gpb = gpc;
-            }
-            else
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
-                            s_a->epot, s_c->epot);
-                }
-                swap_em_state(s_b, s_a);
-                gpb = gpa;
-            }
-
-        }
-        else
-        {
-            if (debug)
-            {
-                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
-                        s_c->epot);
-            }
-            swap_em_state(s_b, s_c);
-            gpb = gpc;
-        }
-
-        /* new search direction */
-        /* beta = 0 means forget all memory and restart with steepest descents. */
-        if (nstcg && ((step % nstcg) == 0))
-        {
-            beta = 0.0;
-        }
-        else
-        {
-            /* s_min->fnorm cannot be zero, because then we would have converged
-             * and broken out.
-             */
-
-            /* Polak-Ribiere update.
-             * Change to fnorm2/fnorm2_old for Fletcher-Reeves
-             */
-            beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
-        }
-        /* Limit beta to prevent oscillations */
-        if (fabs(beta) > 5.0)
-        {
-            beta = 0.0;
-        }
-
-
-        /* update positions */
-        swap_em_state(s_min, s_b);
-        gpa = gpb;
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (bVerbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
-                        step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
-                        s_min->fmax, s_min->a_fmax+1);
-            }
-            /* Store the new (lower) energies */
-            upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                       mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
-                       NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-
-            /* Prepare IMD energy record, if bIMD is TRUE. */
-            IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE);
-
-            if (do_log)
-            {
-                print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
-            }
-            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                       do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-        }
-
-        /* Send energies and positions to the IMD client if bIMD is TRUE. */
-        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
-        {
-            IMD_send_positions(inputrec->imd);
-        }
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (s_min->fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(inputrec->bIMD, inputrec->imd);
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-
-    }
-    if (s_min->fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
-            warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    if (MASTER(cr))
-    {
-        /* If we printed energy and/or logfile last step (which was the last step)
-         * we don't have to do it again, but otherwise print the final values.
-         */
-        if (!do_log)
-        {
-            /* Write final value to log since we didn't do anything the last step */
-            print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
-        }
-        if (!do_ene || !do_log)
-        {
-            /* Write final energy file entries */
-            print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                       !do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-        }
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
-
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, step,
-                  s_min, state_global, f_global);
-
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fnormn = s_min->fnorm/sqrtNumAtoms;
-        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-
-    return 0;
-} /* That's all folks */
-
-
-double do_lbfgs(FILE *fplog, t_commrec *cr,
-                int nfile, const t_filenm fnm[],
-                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
-                int gmx_unused nstglobalcomm,
-                gmx_vsite_t *vsite, gmx_constr_t constr,
-                int gmx_unused stepout,
-                t_inputrec *inputrec,
-                gmx_mtop_t *top_global, t_fcdata *fcd,
-                t_state *state,
-                t_mdatoms *mdatoms,
-                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                gmx_edsam_t gmx_unused ed,
-                t_forcerec *fr,
-                int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-                gmx_membed_t gmx_unused membed,
-                real gmx_unused cpt_period, real gmx_unused max_hours,
-                int imdport,
-                unsigned long gmx_unused Flags,
-                gmx_walltime_accounting_t walltime_accounting)
-{
-    static const char *LBFGS = "Low-Memory BFGS Minimizer";
-    em_state_t         ems;
-    gmx_localtop_t    *top;
-    gmx_enerdata_t    *enerd;
-    rvec              *f;
-    gmx_global_stat_t  gstat;
-    t_graph           *graph;
-    rvec              *f_global;
-    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
-    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
-    real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
-    real              *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
-    real               a, b, c, maxdelta, delta;
-    real               diag, Epot0, Epot, EpotA, EpotB, EpotC;
-    real               dgdx, dgdg, sq, yr, beta;
-    t_mdebin          *mdebin;
-    gmx_bool           converged;
-    rvec               mu_tot;
-    real               fnorm, fmax;
-    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
-    tensor             vir, pres;
-    int                start, end, number_steps;
-    gmx_mdoutf_t       outf;
-    int                i, k, m, n, nfmax, gf, step;
-    int                mdof_flags;
-
-    if (PAR(cr))
-    {
-        gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
-    }
-
-    if (NULL != constr)
-    {
-        gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
-    }
-
-    n        = 3*state->natoms;
-    nmaxcorr = inputrec->nbfgscorr;
-
-    /* Allocate memory */
-    /* Use pointers to real so we dont have to loop over both atoms and
-     * dimensions all the time...
-     * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
-     * that point to the same memory.
-     */
-    snew(xa, n);
-    snew(xb, n);
-    snew(xc, n);
-    snew(fa, n);
-    snew(fb, n);
-    snew(fc, n);
-    snew(frozen, n);
-
-    snew(p, n);
-    snew(lastx, n);
-    snew(lastf, n);
-    snew(rho, nmaxcorr);
-    snew(alpha, nmaxcorr);
-
-    snew(dx, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dx[i], n);
-    }
-
-    snew(dg, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dg[i], n);
-    }
-
-    step  = 0;
-    neval = 0;
-
-    /* Init em */
-    init_em(fplog, LBFGS, cr, inputrec,
-            state, top_global, &ems, &top, &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
-    /* Do_lbfgs is not completely updated like do_steep and do_cg,
-     * so we free some memory again.
-     */
-    sfree(ems.s.x);
-    sfree(ems.f);
-
-    xx = (real *)state->x;
-    ff = (real *)f;
-
-    start = 0;
-    end   = mdatoms->homenr;
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
-
-    do_log = do_ene = do_x = do_f = TRUE;
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
-    gf = 0;
-    for (i = start; i < end; i++)
-    {
-        if (mdatoms->cFREEZE)
-        {
-            gf = mdatoms->cFREEZE[i];
-        }
-        for (m = 0; m < DIM; m++)
-        {
-            frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
-        }
-    }
-    if (MASTER(cr))
-    {
-        sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, state->x, 1, NULL,
-                         top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, state->box);
-    }
-
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole
-     */
-    neval++;
-    ems.s.x = state->x;
-    ems.f   = f;
-    evaluate_energy(fplog, cr,
-                    top_global, &ems, top,
-                    inputrec, nrnb, wcycle, gstat,
-                    vsite, constr, fcd, graph, mdatoms, fr,
-                    mu_tot, enerd, vir, pres, -1, TRUE);
-    where();
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                   mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
-                   NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
-        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-    }
-    where();
-
-    /* This is the starting energy */
-    Epot = enerd->term[F_EPOT];
-
-    fnorm = ems.fnorm;
-    fmax  = ems.fmax;
-    nfmax = ems.a_fmax;
-
-    /* Set the initial step.
-     * since it will be multiplied by the non-normalized search direction
-     * vector (force vector the first time), we scale it by the
-     * norm of the force.
-     */
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
-        fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-
-    // Point is an index to the memory of search directions, where 0 is the first one.
-    point = 0;
-
-    // Set initial search direction to the force (-gradient), or 0 for frozen particles.
-    for (i = 0; i < n; i++)
-    {
-        if (!frozen[i])
-        {
-            dx[point][i] = ff[i]; /* Initial search direction */
-        }
-        else
-        {
-            dx[point][i] = 0;
-        }
-    }
-
-    // Stepsize will be modified during the search, and actually it is not critical
-    // (the main efficiency in the algorithm comes from changing directions), but
-    // we still need an initial value, so estimate it as the inverse of the norm
-    // so we take small steps where the potential fluctuates a lot.
-    stepsize  = 1.0/fnorm;
-
-    /* Start the loop over BFGS steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-
-    ncorr = 0;
-
-    /* Set the gradient from the force */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
-    {
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        mdof_flags = 0;
-        if (do_x)
-        {
-            mdof_flags |= MDOF_X;
-        }
-
-        if (do_f)
-        {
-            mdof_flags |= MDOF_F;
-        }
-
-        if (inputrec->bIMD)
-        {
-            mdof_flags |= MDOF_IMD;
-        }
-
-        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
-                                         top_global, step, (real)step, state, state, f, f);
-
-        /* Do the linesearching in the direction dx[point][0..(n-1)] */
-
-        /* make s a pointer to current search direction - point=0 first time we get here */
-        s = dx[point];
-
-        // calculate line gradient in position A
-        for (gpa = 0, i = 0; i < n; i++)
-        {
-            gpa -= s[i]*ff[i];
-        }
-
-        /* Calculate minimum allowed stepsize along the line, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        for (minstep = 0, i = 0; i < n; i++)
-        {
-            tmp = fabs(xx[i]);
-            if (tmp < 1.0)
-            {
-                tmp = 1.0;
-            }
-            tmp      = s[i]/tmp;
-            minstep += tmp*tmp;
-        }
-        minstep = GMX_REAL_EPS/sqrt(minstep/n);
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        // Before taking any steps along the line, store the old position
-        for (i = 0; i < n; i++)
-        {
-            lastx[i] = xx[i];
-            lastf[i] = ff[i];
-        }
-        Epot0 = Epot;
-
-        for (i = 0; i < n; i++)
-        {
-            xa[i] = xx[i];
-        }
-
-        /* Take a step downhill.
-         * In theory, we should find the actual minimum of the function in this
-         * direction, somewhere along the line.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new BFGS step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * continue straight to the next BFGS step without trying to find any minimum,
-         * i.e. we change the search direction too. If the line was smooth, it is
-         * likely we are in a smooth region, and then it makes sense to take longer
-         * steps in the modified search direction too.
-         *
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one. Then we need to start by finding a lower
-         * value before we change search direction. Since the energy was apparently
-         * quite rough, we need to decrease the step size.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-
-        // State "A" is the first position along the line.
-        // reference position along line is initially zero
-        EpotA      = Epot0;
-        a          = 0.0;
-
-        // Check stepsize first. We do not allow displacements
-        // larger than emstep.
-        //
-        do
-        {
-            // Pick a new position C by adding stepsize to A.
-            c        = a + stepsize;
-
-            // Calculate what the largest change in any individual coordinate
-            // would be (translation along line * gradient along line)
-            maxdelta = 0;
-            for (i = 0; i < n; i++)
-            {
-                delta = c*s[i];
-                if (delta > maxdelta)
-                {
-                    maxdelta = delta;
-                }
-            }
-            // If any displacement is larger than the stepsize limit, reduce the step
-            if (maxdelta > inputrec->em_stepsize)
-            {
-                stepsize *= 0.1;
-            }
-        }
-        while (maxdelta > inputrec->em_stepsize);
-
-        // Take a trial step and move the coordinate array xc[] to position C
-        for (i = 0; i < n; i++)
-        {
-            xc[i] = lastx[i] + c*s[i];
-        }
-
-        neval++;
-        // Calculate energy for the trial step in position C
-        ems.s.x = (rvec *)xc;
-        ems.f   = (rvec *)fc;
-        evaluate_energy(fplog, cr,
-                        top_global, &ems, top,
-                        inputrec, nrnb, wcycle, gstat,
-                        vsite, constr, fcd, graph, mdatoms, fr,
-                        mu_tot, enerd, vir, pres, step, FALSE);
-        EpotC = ems.epot;
-
-        // Calc line gradient in position C
-        for (gpc = 0, i = 0; i < n; i++)
-        {
-            gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        // This is the max amount of increase in energy we tolerate.
-        // By allowing VERY small changes (close to numerical precision) we
-        // frequently find even better (lower) final energies.
-        tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
-
-        // Accept the step if the energy is lower in the new position C (compared to A),
-        // or if it is not significantly higher and the line derivative is still negative.
-        if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
-        {
-            // Great, we found a better energy. We no longer try to alter the
-            // stepsize, but simply accept this new better position. The we select a new
-            // search direction instead, which will be much more efficient than continuing
-            // to take smaller steps along a line. Set fnorm based on the new C position,
-            // which will be used to update the stepsize to 1/fnorm further down.
-            foundlower = TRUE;
-            fnorm      = ems.fnorm;
-        }
-        else
-        {
-            // If we got here, the energy is NOT lower in point C, i.e. it will be the same
-            // or higher than in point A. In this case it is pointless to move to point C,
-            // so we will have to do more iterations along the same line to find a smaller
-            // value in the interval [A=0.0,C].
-            // Here, A is still 0.0, but that will change when we do a search in the interval
-            // [0.0,C] below. That search we will do by interpolation or bisection rather
-            // than with the stepsize, so no need to modify it. For the next search direction
-            // it will be reset to 1/fnorm anyway.
-            foundlower = FALSE;
-        }
-
-        if (!foundlower)
-        {
-            // OK, if we didn't find a lower value we will have to locate one now - there must
-            // be one in the interval [a,c].
-            // The same thing is valid here, though: Don't spend dozens of iterations to find
-            // the line minimum. We try to interpolate based on the derivative at the endpoints,
-            // and only continue until we find a lower value. In most cases this means 1-2 iterations.
-            // I also have a safeguard for potentially really pathological functions so we never
-            // take more than 20 steps before we give up.
-            // If we already found a lower value we just skip this step and continue to the update.
-            nminstep = 0;
-            do
-            {
-                // Select a new trial point B in the interval [A,C].
-                // If the derivatives at points a & c have different sign we interpolate to zero,
-                // otherwise just do a bisection since there might be multiple minima/maxima
-                // inside the interval.
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa*(a-c)/(gpc-gpa);
-                }
-                else
-                {
-                    b = 0.5*(a+c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5*(a+c);
-                }
-
-                // Take a trial step to point B
-                for (i = 0; i < n; i++)
-                {
-                    xb[i] = lastx[i] + b*s[i];
-                }
-
-                neval++;
-                // Calculate energy for the trial step in point B
-                ems.s.x = (rvec *)xb;
-                ems.f   = (rvec *)fb;
-                evaluate_energy(fplog, cr,
-                                top_global, &ems, top,
-                                inputrec, nrnb, wcycle, gstat,
-                                vsite, constr, fcd, graph, mdatoms, fr,
-                                mu_tot, enerd, vir, pres, step, FALSE);
-                EpotB = ems.epot;
-                fnorm = ems.fnorm;
-
-                // Calculate gradient in point B
-                for (gpb = 0, i = 0; i < n; i++)
-                {
-                    gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
-
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
-                // at the new point B, and rename the endpoints of this new interval A and C.
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    EpotC = EpotB;
-                    c     = b;
-                    gpc   = gpb;
-                    /* swap coord pointers b/c */
-                    xtmp = xb;
-                    ftmp = fb;
-                    xb   = xc;
-                    fb   = fc;
-                    xc   = xtmp;
-                    fc   = ftmp;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    EpotA = EpotB;
-                    a     = b;
-                    gpa   = gpb;
-                    /* swap coord pointers a/b */
-                    xtmp = xb;
-                    ftmp = fb;
-                    xb   = xa;
-                    fb   = fa;
-                    xa   = xtmp;
-                    fa   = ftmp;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints,
-                 * or if the tolerance is below machine precision.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            }
-            while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
-
-            if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If ncorr==0 this was steepest descent, and then we give up.
-                 * If not, reset memory to restart as steepest descent before quitting.
-                 */
-                if (ncorr == 0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory */
-                    ncorr = 0;
-                    /* Search in gradient direction */
-                    for (i = 0; i < n; i++)
-                    {
-                        dx[point][i] = ff[i];
-                    }
-                    /* Reset stepsize */
-                    stepsize = 1.0/fnorm;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in xx/ff/Epot
-             */
-            if (EpotC < EpotA)
-            {
-                Epot = EpotC;
-                /* Use state C */
-                for (i = 0; i < n; i++)
-                {
-                    xx[i] = xc[i];
-                    ff[i] = fc[i];
-                }
-                step_taken = c;
-            }
-            else
-            {
-                Epot = EpotA;
-                /* Use state A */
-                for (i = 0; i < n; i++)
-                {
-                    xx[i] = xa[i];
-                    ff[i] = fa[i];
-                }
-                step_taken = a;
-            }
-
-        }
-        else
-        {
-            /* found lower */
-            Epot = EpotC;
-            /* Use state C */
-            for (i = 0; i < n; i++)
-            {
-                xx[i] = xc[i];
-                ff[i] = fc[i];
-            }
-            step_taken = c;
-        }
-
-        /* Update the memory information, and calculate a new
-         * approximation of the inverse hessian
-         */
-
-        /* Have new data in Epot, xx, ff */
-        if (ncorr < nmaxcorr)
-        {
-            ncorr++;
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            dg[point][i]  = lastf[i]-ff[i];
-            dx[point][i] *= step_taken;
-        }
-
-        dgdg = 0;
-        dgdx = 0;
-        for (i = 0; i < n; i++)
-        {
-            dgdg += dg[point][i]*dg[point][i];
-            dgdx += dg[point][i]*dx[point][i];
-        }
-
-        diag = dgdx/dgdg;
-
-        rho[point] = 1.0/dgdx;
-        point++;
-
-        if (point >= nmaxcorr)
-        {
-            point = 0;
-        }
-
-        /* Update */
-        for (i = 0; i < n; i++)
-        {
-            p[i] = ff[i];
-        }
-
-        cp = point;
-
-        /* Recursive update. First go back over the memory points */
-        for (k = 0; k < ncorr; k++)
-        {
-            cp--;
-            if (cp < 0)
-            {
-                cp = ncorr-1;
-            }
-
-            sq = 0;
-            for (i = 0; i < n; i++)
-            {
-                sq += dx[cp][i]*p[i];
-            }
-
-            alpha[cp] = rho[cp]*sq;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] -= alpha[cp]*dg[cp][i];
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            p[i] *= diag;
-        }
-
-        /* And then go forward again */
-        for (k = 0; k < ncorr; k++)
-        {
-            yr = 0;
-            for (i = 0; i < n; i++)
-            {
-                yr += p[i]*dg[cp][i];
-            }
-
-            beta = rho[cp]*yr;
-            beta = alpha[cp]-beta;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] += beta*dx[cp][i];
-            }
-
-            cp++;
-            if (cp >= ncorr)
-            {
-                cp = 0;
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            if (!frozen[i])
-            {
-                dx[point][i] = p[i];
-            }
-            else
-            {
-                dx[point][i] = 0;
-            }
-        }
-
-        /* Test whether the convergence criterion is met */
-        get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (bVerbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
-                        step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1);
-            }
-            /* Store the new (lower) energies */
-            upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                       mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
-                       NULL, NULL, vir, pres, NULL, mu_tot, constr);
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-            if (do_log)
-            {
-                print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
-            }
-            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                       do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-        }
-
-        /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr))
-        {
-            IMD_send_positions(inputrec->imd);
-        }
-
-        // Reset stepsize in we are doing more iterations
-        stepsize = 1.0/fnorm;
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(inputrec->bIMD, inputrec->imd);
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-
-    }
-    if (fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
-            warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    /* If we printed energy and/or logfile last step (which was the last step)
-     * we don't have to do it again, but otherwise print the final values.
-     */
-    if (!do_log) /* Write final value to log since we didn't do anythin last step */
-    {
-        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
-    }
-    if (!do_ene || !do_log) /* Write final energy file entries */
-    {
-        print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                   !do_log ? fplog : NULL, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = !do_per_step(step, inputrec->nstfout);
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, step,
-                  &ems, state, f);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
-        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
-                        number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
-        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
-                        number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-
-    return 0;
-} /* That's all folks */
-
-
-double do_steep(FILE *fplog, t_commrec *cr,
-                int nfile, const t_filenm fnm[],
-                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
-                int gmx_unused nstglobalcomm,
-                gmx_vsite_t *vsite, gmx_constr_t constr,
-                int gmx_unused stepout,
-                t_inputrec *inputrec,
-                gmx_mtop_t *top_global, t_fcdata *fcd,
-                t_state *state_global,
-                t_mdatoms *mdatoms,
-                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                gmx_edsam_t gmx_unused  ed,
-                t_forcerec *fr,
-                int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-                gmx_membed_t gmx_unused membed,
-                real gmx_unused cpt_period, real gmx_unused max_hours,
-                int imdport,
-                unsigned long gmx_unused Flags,
-                gmx_walltime_accounting_t walltime_accounting)
-{
-    const char       *SD = "Steepest Descents";
-    em_state_t       *s_min, *s_try;
-    rvec             *f_global;
-    gmx_localtop_t   *top;
-    gmx_enerdata_t   *enerd;
-    rvec             *f;
-    gmx_global_stat_t gstat;
-    t_graph          *graph;
-    real              stepsize;
-    real              ustep, fnormn;
-    gmx_mdoutf_t      outf;
-    t_mdebin         *mdebin;
-    gmx_bool          bDone, bAbort, do_x, do_f;
-    tensor            vir, pres;
-    rvec              mu_tot;
-    int               nsteps;
-    int               count          = 0;
-    int               steps_accepted = 0;
-
-    s_min = init_em_state();
-    s_try = init_em_state();
-
-    /* Init em and store the local state in s_try */
-    init_em(fplog, SD, cr, inputrec,
-            state_global, top_global, s_try, &top, &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
-
-    /* Print to log file  */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
-
-    /* Set variables for stepsize (in nm). This is the largest
-     * step that we are going to make in any direction.
-     */
-    ustep    = inputrec->em_stepsize;
-    stepsize = 0;
-
-    /* Max number of steps  */
-    nsteps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        /* Print to the screen  */
-        sp_header(stderr, SD, inputrec->em_tol, nsteps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, SD, inputrec->em_tol, nsteps);
-    }
-
-    /**** HERE STARTS THE LOOP ****
-     * count is the counter for the number of steps
-     * bDone will be TRUE when the minimization has converged
-     * bAbort will be TRUE when nsteps steps have been performed or when
-     * the stepsize becomes smaller than is reasonable for machine precision
-     */
-    count  = 0;
-    bDone  = FALSE;
-    bAbort = FALSE;
-    while (!bDone && !bAbort)
-    {
-        bAbort = (nsteps >= 0) && (count == nsteps);
-
-        /* set new coordinates, except for first step */
-        if (count > 0)
-        {
-            do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
-                       s_min, stepsize, s_min->f, s_try,
-                       constr, top, nrnb, wcycle, count);
-        }
-
-        evaluate_energy(fplog, cr,
-                        top_global, s_try, top,
-                        inputrec, nrnb, wcycle, gstat,
-                        vsite, constr, fcd, graph, mdatoms, fr,
-                        mu_tot, enerd, vir, pres, count, count == 0);
-
-        if (MASTER(cr))
-        {
-            print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
-        }
-
-        if (count == 0)
-        {
-            s_min->epot = s_try->epot;
-        }
-
-        /* Print it if necessary  */
-        if (MASTER(cr))
-        {
-            if (bVerbose)
-            {
-                fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
-                        count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
-                        ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
-            }
-
-            if ( (count == 0) || (s_try->epot < s_min->epot) )
-            {
-                /* Store the new (lower) energies  */
-                upd_mdebin(mdebin, FALSE, FALSE, (double)count,
-                           mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
-                           s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-                /* Prepare IMD energy record, if bIMD is TRUE. */
-                IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE);
-
-                print_ebin(mdoutf_get_fp_ene(outf), TRUE,
-                           do_per_step(steps_accepted, inputrec->nstdisreout),
-                           do_per_step(steps_accepted, inputrec->nstorireout),
-                           fplog, count, count, eprNORMAL, TRUE,
-                           mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-                fflush(fplog);
-            }
-        }
-
-        /* Now if the new energy is smaller than the previous...
-         * or if this is the first step!
-         * or if we did random steps!
-         */
-
-        if ( (count == 0) || (s_try->epot < s_min->epot) )
-        {
-            steps_accepted++;
-
-            /* Test whether the convergence criterion is met...  */
-            bDone = (s_try->fmax < inputrec->em_tol);
-
-            /* Copy the arrays for force, positions and energy  */
-            /* The 'Min' array always holds the coords and forces of the minimal
-               sampled energy  */
-            swap_em_state(s_min, s_try);
-            if (count > 0)
-            {
-                ustep *= 1.2;
-            }
-
-            /* Write to trn, if necessary */
-            do_x = do_per_step(steps_accepted, inputrec->nstxout);
-            do_f = do_per_step(steps_accepted, inputrec->nstfout);
-            write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
-                          top_global, inputrec, count,
-                          s_min, state_global, f_global);
-        }
-        else
-        {
-            /* If energy is not smaller make the step smaller...  */
-            ustep *= 0.5;
-
-            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-            {
-                /* Reload the old state */
-                em_dd_partition_system(fplog, count, cr, top_global, inputrec,
-                                       s_min, top, mdatoms, fr, vsite, constr,
-                                       nrnb, wcycle);
-            }
-        }
-
-        /* Determine new step  */
-        stepsize = ustep/s_min->fmax;
-
-        /* Check if stepsize is too small, with 1 nm as a characteristic length */
-#ifdef GMX_DOUBLE
-        if (count == nsteps || ustep < 1e-12)
-#else
-        if (count == nsteps || ustep < 1e-6)
-#endif
-        {
-            if (MASTER(cr))
-            {
-                warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
-                warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
-            }
-            bAbort = TRUE;
-        }
-
-        /* Send IMD energies and positions, if bIMD is TRUE. */
-        if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
-        {
-            IMD_send_positions(inputrec->imd);
-        }
-
-        count++;
-    } /* End of the loop  */
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(inputrec->bIMD, inputrec->imd);
-
-    /* Print some data...  */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-    write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, count,
-                  s_min, state_global, f_global);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fnormn = s_min->fnorm/sqrtNumAtoms;
-
-        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    inputrec->nsteps = count;
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, count);
-
-    return 0;
-} /* That's all folks */
-
-
-double do_nm(FILE *fplog, t_commrec *cr,
-             int nfile, const t_filenm fnm[],
-             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused  bCompact,
-             int gmx_unused nstglobalcomm,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int gmx_unused stepout,
-             t_inputrec *inputrec,
-             gmx_mtop_t *top_global, t_fcdata *fcd,
-             t_state *state_global,
-             t_mdatoms *mdatoms,
-             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-             gmx_edsam_t  gmx_unused ed,
-             t_forcerec *fr,
-             int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-             gmx_membed_t gmx_unused membed,
-             real gmx_unused cpt_period, real gmx_unused max_hours,
-             int imdport,
-             unsigned long gmx_unused Flags,
-             gmx_walltime_accounting_t walltime_accounting)
-{
-    const char          *NM = "Normal Mode Analysis";
-    gmx_mdoutf_t         outf;
-    int                  natoms, atom, d;
-    int                  nnodes, node;
-    rvec                *f_global;
-    gmx_localtop_t      *top;
-    gmx_enerdata_t      *enerd;
-    rvec                *f;
-    gmx_global_stat_t    gstat;
-    t_graph             *graph;
-    tensor               vir, pres;
-    rvec                 mu_tot;
-    rvec                *fneg, *dfdx;
-    gmx_bool             bSparse; /* use sparse matrix storage format */
-    size_t               sz = 0;
-    gmx_sparsematrix_t * sparse_matrix           = NULL;
-    real           *     full_matrix             = NULL;
-    em_state_t       *   state_work;
-
-    /* added with respect to mdrun */
-    int        i, j, k, row, col;
-    real       der_range = 10.0*sqrt(GMX_REAL_EPS);
-    real       x_min;
-    bool       bIsMaster = MASTER(cr);
-
-    if (constr != NULL)
-    {
-        gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
-    }
-
-    state_work = init_em_state();
-
-    /* Init em and store the local state in state_minimum */
-    init_em(fplog, NM, cr, inputrec,
-            state_global, top_global, state_work, &top,
-            &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, NULL, imdport, Flags, wcycle);
-
-    natoms = top_global->natoms;
-    snew(fneg, natoms);
-    snew(dfdx, natoms);
-
-#ifndef GMX_DOUBLE
-    if (bIsMaster)
-    {
-        fprintf(stderr,
-                "NOTE: This version of GROMACS has been compiled in single precision,\n"
-                "      which MIGHT not be accurate enough for normal mode analysis.\n"
-                "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
-                "      are fairly modest even if you recompile in double precision.\n\n");
-    }
-#endif
-
-    /* Check if we can/should use sparse storage format.
-     *
-     * Sparse format is only useful when the Hessian itself is sparse, which it
-     * will be when we use a cutoff.
-     * For small systems (n<1000) it is easier to always use full matrix format, though.
-     */
-    if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
-    {
-        md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
-        bSparse = FALSE;
-    }
-    else if (top_global->natoms < 1000)
-    {
-        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
-        bSparse = FALSE;
-    }
-    else
-    {
-        md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n");
-        bSparse = TRUE;
-    }
-
-    if (bIsMaster)
-    {
-        sz = DIM*top_global->natoms;
-
-        fprintf(stderr, "Allocating Hessian memory...\n\n");
-
-        if (bSparse)
-        {
-            sparse_matrix = gmx_sparsematrix_init(sz);
-            sparse_matrix->compressed_symmetric = TRUE;
-        }
-        else
-        {
-            snew(full_matrix, sz*sz);
-        }
-    }
-
-    init_nrnb(nrnb);
-
-    where();
-
-    /* Write start time and temperature */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
-
-    /* fudge nr of steps to nr of atoms */
-    inputrec->nsteps = natoms*2;
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
-                *(top_global->name), (int)inputrec->nsteps);
-    }
-
-    nnodes = cr->nnodes;
-
-    /* Make evaluate_energy do a single node force calculation */
-    cr->nnodes = 1;
-    evaluate_energy(fplog, cr,
-                    top_global, state_work, top,
-                    inputrec, nrnb, wcycle, gstat,
-                    vsite, constr, fcd, graph, mdatoms, fr,
-                    mu_tot, enerd, vir, pres, -1, TRUE);
-    cr->nnodes = nnodes;
-
-    /* if forces are not small, warn user */
-    get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
-
-    md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax);
-    if (state_work->fmax > 1.0e-3)
-    {
-        md_print_info(cr, fplog,
-                      "The force is probably not small enough to "
-                      "ensure that you are at a minimum.\n"
-                      "Be aware that negative eigenvalues may occur\n"
-                      "when the resulting matrix is diagonalized.\n\n");
-    }
-
-    /***********************************************************
-     *
-     *      Loop over all pairs in matrix
-     *
-     *      do_force called twice. Once with positive and
-     *      once with negative displacement
-     *
-     ************************************************************/
-
-    /* Steps are divided one by one over the nodes */
-    for (atom = cr->nodeid; atom < natoms; atom += nnodes)
-    {
-
-        for (d = 0; d < DIM; d++)
-        {
-            x_min = state_work->s.x[atom][d];
-
-            state_work->s.x[atom][d] = x_min - der_range;
-
-            /* Make evaluate_energy do a single node force calculation */
-            cr->nnodes = 1;
-            evaluate_energy(fplog, cr,
-                            top_global, state_work, top,
-                            inputrec, nrnb, wcycle, gstat,
-                            vsite, constr, fcd, graph, mdatoms, fr,
-                            mu_tot, enerd, vir, pres, atom*2, FALSE);
-
-            for (i = 0; i < natoms; i++)
-            {
-                copy_rvec(state_work->f[i], fneg[i]);
-            }
-
-            state_work->s.x[atom][d] = x_min + der_range;
-
-            evaluate_energy(fplog, cr,
-                            top_global, state_work, top,
-                            inputrec, nrnb, wcycle, gstat,
-                            vsite, constr, fcd, graph, mdatoms, fr,
-                            mu_tot, enerd, vir, pres, atom*2+1, FALSE);
-            cr->nnodes = nnodes;
-
-            /* x is restored to original */
-            state_work->s.x[atom][d] = x_min;
-
-            for (j = 0; j < natoms; j++)
-            {
-                for (k = 0; (k < DIM); k++)
-                {
-                    dfdx[j][k] =
-                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
-                }
-            }
-
-            if (!bIsMaster)
-            {
-#ifdef GMX_MPI
-#ifdef GMX_DOUBLE
-#define mpi_type MPI_DOUBLE
-#else
-#define mpi_type MPI_FLOAT
-#endif
-                MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
-                         cr->mpi_comm_mygroup);
-#endif
-            }
-            else
-            {
-                for (node = 0; (node < nnodes && atom+node < natoms); node++)
-                {
-                    if (node > 0)
-                    {
-#ifdef GMX_MPI
-                        MPI_Status stat;
-                        MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
-                                 cr->mpi_comm_mygroup, &stat);
-#undef mpi_type
-#endif
-                    }
-
-                    row = (atom + node)*DIM + d;
-
-                    for (j = 0; j < natoms; j++)
-                    {
-                        for (k = 0; k < DIM; k++)
-                        {
-                            col = j*DIM + k;
-
-                            if (bSparse)
-                            {
-                                if (col >= row && dfdx[j][k] != 0.0)
-                                {
-                                    gmx_sparsematrix_increment_value(sparse_matrix,
-                                                                     row, col, dfdx[j][k]);
-                                }
-                            }
-                            else
-                            {
-                                full_matrix[row*sz+col] = dfdx[j][k];
-                            }
-                        }
-                    }
-                }
-            }
-
-            if (bVerbose && fplog)
-            {
-                fflush(fplog);
-            }
-        }
-        /* write progress */
-        if (bIsMaster && bVerbose)
-        {
-            fprintf(stderr, "\rFinished step %d out of %d",
-                    std::min(atom+nnodes, natoms), natoms);
-            fflush(stderr);
-        }
-    }
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "\n\nWriting Hessian...\n");
-        gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2);
-
-    return 0;
-}
diff --git a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/minimize.cpp.preplumed b/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/minimize.cpp.preplumed
deleted file mode 100644
index a6ffb5c793f445ef6779c929ac1548b8888f1648..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/gromacs/mdlib/minimize.cpp.preplumed
+++ /dev/null
@@ -1,2888 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-#include <string.h>
-#include <time.h>
-
-#include <algorithm>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/confio.h"
-#include "gromacs/fileio/mtxio.h"
-#include "gromacs/fileio/trajectory_writing.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdebin.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/nrnb.h"
-#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/legacyheaders/sim_util.h"
-#include "gromacs/legacyheaders/tgroup.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/update.h"
-#include "gromacs/legacyheaders/vsite.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/linearalgebra/sparsematrix.h"
-#include "gromacs/listed-forces/manage-threading.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/smalloc.h"
-
-typedef struct {
-    t_state  s;
-    rvec    *f;
-    real     epot;
-    real     fnorm;
-    real     fmax;
-    int      a_fmax;
-} em_state_t;
-
-static em_state_t *init_em_state()
-{
-    em_state_t *ems;
-
-    snew(ems, 1);
-
-    /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
-    snew(ems->s.lambda, efptNR);
-
-    return ems;
-}
-
-static void print_em_start(FILE                     *fplog,
-                           t_commrec                *cr,
-                           gmx_walltime_accounting_t walltime_accounting,
-                           gmx_wallcycle_t           wcycle,
-                           const char               *name)
-{
-    walltime_accounting_start(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, name);
-}
-static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
-                        gmx_wallcycle_t           wcycle)
-{
-    wallcycle_stop(wcycle, ewcRUN);
-
-    walltime_accounting_end(walltime_accounting);
-}
-
-static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
-{
-    fprintf(out, "\n");
-    fprintf(out, "%s:\n", minimizer);
-    fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
-    fprintf(out, "   Number of steps    = %12d\n", nsteps);
-}
-
-static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
-{
-    char buffer[2048];
-    if (bLastStep)
-    {
-        sprintf(buffer,
-                "\nEnergy minimization reached the maximum number "
-                "of steps before the forces reached the requested "
-                "precision Fmax < %g.\n", ftol);
-    }
-    else
-    {
-        sprintf(buffer,
-                "\nEnergy minimization has stopped, but the forces have "
-                "not converged to the requested precision Fmax < %g (which "
-                "may not be possible for your system). It stopped "
-                "because the algorithm tried to make a new step whose size "
-                "was too small, or there was no change in the energy since "
-                "last step. Either way, we regard the minimization as "
-                "converged to within the available machine precision, "
-                "given your starting configuration and EM parameters.\n%s%s",
-                ftol,
-                sizeof(real) < sizeof(double) ?
-                "\nDouble precision normally gives you higher accuracy, but "
-                "this is often not needed for preparing to run molecular "
-                "dynamics.\n" :
-                "",
-                bConstrain ?
-                "You might need to increase your constraint accuracy, or turn\n"
-                "off constraints altogether (set constraints = none in mdp file)\n" :
-                "");
-    }
-    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
-}
-
-
-
-static void print_converged(FILE *fp, const char *alg, real ftol,
-                            gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
-                            real epot, real fmax, int nfmax, real fnorm)
-{
-    char buf[STEPSTRSIZE];
-
-    if (bDone)
-    {
-        fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
-                alg, ftol, gmx_step_str(count, buf));
-    }
-    else if (count < nsteps)
-    {
-        fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
-                "but did not reach the requested Fmax < %g.\n",
-                alg, gmx_step_str(count, buf), ftol);
-    }
-    else
-    {
-        fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
-                alg, ftol, gmx_step_str(count, buf));
-    }
-
-#ifdef GMX_DOUBLE
-    fprintf(fp, "Potential Energy  = %21.14e\n", epot);
-    fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
-    fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
-#else
-    fprintf(fp, "Potential Energy  = %14.7e\n", epot);
-    fprintf(fp, "Maximum force     = %14.7e on atom %d\n", fmax, nfmax+1);
-    fprintf(fp, "Norm of force     = %14.7e\n", fnorm);
-#endif
-}
-
-static void get_f_norm_max(t_commrec *cr,
-                           t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
-                           real *fnorm, real *fmax, int *a_fmax)
-{
-    double fnorm2, *sum;
-    real   fmax2, fam;
-    int    la_max, a_max, start, end, i, m, gf;
-
-    /* This routine finds the largest force and returns it.
-     * On parallel machines the global max is taken.
-     */
-    fnorm2 = 0;
-    fmax2  = 0;
-    la_max = -1;
-    start  = 0;
-    end    = mdatoms->homenr;
-    if (mdatoms->cFREEZE)
-    {
-        for (i = start; i < end; i++)
-        {
-            gf  = mdatoms->cFREEZE[i];
-            fam = 0;
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    fam += sqr(f[i][m]);
-                }
-            }
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-    else
-    {
-        for (i = start; i < end; i++)
-        {
-            fam     = norm2(f[i]);
-            fnorm2 += fam;
-            if (fam > fmax2)
-            {
-                fmax2  = fam;
-                la_max = i;
-            }
-        }
-    }
-
-    if (la_max >= 0 && DOMAINDECOMP(cr))
-    {
-        a_max = cr->dd->gatindex[la_max];
-    }
-    else
-    {
-        a_max = la_max;
-    }
-    if (PAR(cr))
-    {
-        snew(sum, 2*cr->nnodes+1);
-        sum[2*cr->nodeid]   = fmax2;
-        sum[2*cr->nodeid+1] = a_max;
-        sum[2*cr->nnodes]   = fnorm2;
-        gmx_sumd(2*cr->nnodes+1, sum, cr);
-        fnorm2 = sum[2*cr->nnodes];
-        /* Determine the global maximum */
-        for (i = 0; i < cr->nnodes; i++)
-        {
-            if (sum[2*i] > fmax2)
-            {
-                fmax2 = sum[2*i];
-                a_max = (int)(sum[2*i+1] + 0.5);
-            }
-        }
-        sfree(sum);
-    }
-
-    if (fnorm)
-    {
-        *fnorm = sqrt(fnorm2);
-    }
-    if (fmax)
-    {
-        *fmax  = sqrt(fmax2);
-    }
-    if (a_fmax)
-    {
-        *a_fmax = a_max;
-    }
-}
-
-static void get_state_f_norm_max(t_commrec *cr,
-                                 t_grpopts *opts, t_mdatoms *mdatoms,
-                                 em_state_t *ems)
-{
-    get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
-}
-
-void init_em(FILE *fplog, const char *title,
-             t_commrec *cr, t_inputrec *ir,
-             t_state *state_global, gmx_mtop_t *top_global,
-             em_state_t *ems, gmx_localtop_t **top,
-             rvec **f, rvec **f_global,
-             t_nrnb *nrnb, rvec mu_tot,
-             t_forcerec *fr, gmx_enerdata_t **enerd,
-             t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int nfile, const t_filenm fnm[],
-             gmx_mdoutf_t *outf, t_mdebin **mdebin,
-             int imdport, unsigned long gmx_unused Flags,
-             gmx_wallcycle_t wcycle)
-{
-    int  i;
-    real dvdl_constr;
-
-    if (fplog)
-    {
-        fprintf(fplog, "Initiating %s\n", title);
-    }
-
-    state_global->ngtc = 0;
-
-    /* Initialize lambda variables */
-    initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
-
-    init_nrnb(nrnb);
-
-    /* Interactive molecular dynamics */
-    init_IMD(ir, cr, top_global, fplog, 1, state_global->x,
-             nfile, fnm, NULL, imdport, Flags);
-
-    if (DOMAINDECOMP(cr))
-    {
-        *top = dd_init_local_top(top_global);
-
-        dd_init_local_state(cr->dd, state_global, &ems->s);
-
-        *f = NULL;
-
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
-                            state_global, top_global, ir,
-                            &ems->s, &ems->f, mdatoms, *top,
-                            fr, vsite, NULL, constr,
-                            nrnb, NULL, FALSE);
-        dd_store_state(cr->dd, &ems->s);
-
-        if (ir->nstfout)
-        {
-            snew(*f_global, top_global->natoms);
-        }
-        else
-        {
-            *f_global = NULL;
-        }
-        *graph = NULL;
-    }
-    else
-    {
-        snew(*f, top_global->natoms);
-
-        /* Just copy the state */
-        ems->s = *state_global;
-        snew(ems->s.x, ems->s.nalloc);
-        snew(ems->f, ems->s.nalloc);
-        for (i = 0; i < state_global->natoms; i++)
-        {
-            copy_rvec(state_global->x[i], ems->s.x[i]);
-        }
-        copy_mat(state_global->box, ems->s.box);
-
-        *top      = gmx_mtop_generate_local_top(top_global, ir);
-        *f_global = *f;
-
-        setup_bonded_threading(fr, &(*top)->idef);
-
-        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
-        {
-            *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE);
-        }
-        else
-        {
-            *graph = NULL;
-        }
-
-        atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);
-        update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
-
-        if (vsite)
-        {
-            set_vsite_top(vsite, *top, mdatoms, cr);
-        }
-    }
-
-    if (constr)
-    {
-        if (ir->eConstrAlg == econtSHAKE &&
-            gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
-        {
-            gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
-                      econstr_names[econtSHAKE], econstr_names[econtLINCS]);
-        }
-
-        if (!DOMAINDECOMP(cr))
-        {
-            set_constraints(constr, *top, ir, mdatoms, cr);
-        }
-
-        if (!ir->bContinuation)
-        {
-            /* Constrain the starting coordinates */
-            dvdl_constr = 0;
-            constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
-                      ir, cr, -1, 0, 1.0, mdatoms,
-                      ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
-                      ems->s.lambda[efptFEP], &dvdl_constr,
-                      NULL, NULL, nrnb, econqCoord);
-        }
-    }
-
-    if (PAR(cr))
-    {
-        *gstat = global_stat_init(ir);
-    }
-    else
-    {
-        *gstat = NULL;
-    }
-
-    *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle);
-
-    snew(*enerd, 1);
-    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
-                  *enerd);
-
-    if (mdebin != NULL)
-    {
-        /* Init bin for energy stuff */
-        *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL);
-    }
-
-    clear_rvec(mu_tot);
-    calc_shifts(ems->s.box, fr->shift_vec);
-}
-
-static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
-                      gmx_walltime_accounting_t walltime_accounting,
-                      gmx_wallcycle_t wcycle)
-{
-    if (!(cr->duty & DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    done_mdoutf(outf);
-
-    em_time_end(walltime_accounting, wcycle);
-}
-
-static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
-{
-    em_state_t tmp;
-
-    tmp   = *ems1;
-    *ems1 = *ems2;
-    *ems2 = tmp;
-}
-
-static void copy_em_coords(em_state_t *ems, t_state *state)
-{
-    int i;
-
-    for (i = 0; (i < state->natoms); i++)
-    {
-        copy_rvec(ems->s.x[i], state->x[i]);
-    }
-}
-
-static void write_em_traj(FILE *fplog, t_commrec *cr,
-                          gmx_mdoutf_t outf,
-                          gmx_bool bX, gmx_bool bF, const char *confout,
-                          gmx_mtop_t *top_global,
-                          t_inputrec *ir, gmx_int64_t step,
-                          em_state_t *state,
-                          t_state *state_global, rvec *f_global)
-{
-    int      mdof_flags;
-    gmx_bool bIMDout = FALSE;
-
-
-    /* Shall we do IMD output? */
-    if (ir->bIMD)
-    {
-        bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup));
-    }
-
-    if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr))
-    {
-        copy_em_coords(state, state_global);
-        f_global = state->f;
-    }
-
-    mdof_flags = 0;
-    if (bX)
-    {
-        mdof_flags |= MDOF_X;
-    }
-    if (bF)
-    {
-        mdof_flags |= MDOF_F;
-    }
-
-    /* If we want IMD output, set appropriate MDOF flag */
-    if (ir->bIMD)
-    {
-        mdof_flags |= MDOF_IMD;
-    }
-
-    mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
-                                     top_global, step, (double)step,
-                                     &state->s, state_global, state->f, f_global);
-
-    if (confout != NULL && MASTER(cr))
-    {
-        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
-        {
-            /* Make molecules whole only for confout writing */
-            do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
-                        state_global->x);
-        }
-
-        write_sto_conf_mtop(confout,
-                            *top_global->name, top_global,
-                            state_global->x, NULL, ir->ePBC, state_global->box);
-    }
-}
-
-static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
-                       gmx_bool bMolPBC,
-                       em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
-                       gmx_constr_t constr, gmx_localtop_t *top,
-                       t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                       gmx_int64_t count)
-
-{
-    t_state *s1, *s2;
-    int      start, end;
-    real     dvdl_constr;
-    int      nthreads gmx_unused;
-
-    s1 = &ems1->s;
-    s2 = &ems2->s;
-
-    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
-    {
-        gmx_incons("state mismatch in do_em_step");
-    }
-
-    s2->flags = s1->flags;
-
-    if (s2->nalloc != s1->nalloc)
-    {
-        s2->nalloc = s1->nalloc;
-        srenew(s2->x, s1->nalloc);
-        srenew(ems2->f,  s1->nalloc);
-        if (s2->flags & (1<<estCGP))
-        {
-            srenew(s2->cg_p,  s1->nalloc);
-        }
-    }
-
-    s2->natoms = s1->natoms;
-    copy_mat(s1->box, s2->box);
-    /* Copy free energy state */
-    for (int i = 0; i < efptNR; i++)
-    {
-        s2->lambda[i] = s1->lambda[i];
-    }
-    copy_mat(s1->box, s2->box);
-
-    start = 0;
-    end   = md->homenr;
-
-    // cppcheck-suppress unreadVariable
-    nthreads = gmx_omp_nthreads_get(emntUpdate);
-#pragma omp parallel num_threads(nthreads)
-    {
-        rvec *x1 = s1->x;
-        rvec *x2 = s2->x;
-
-        int   gf = 0;
-#pragma omp for schedule(static) nowait
-        for (int i = start; i < end; i++)
-        {
-            if (md->cFREEZE)
-            {
-                gf = md->cFREEZE[i];
-            }
-            for (int m = 0; m < DIM; m++)
-            {
-                if (ir->opts.nFreeze[gf][m])
-                {
-                    x2[i][m] = x1[i][m];
-                }
-                else
-                {
-                    x2[i][m] = x1[i][m] + a*f[i][m];
-                }
-            }
-        }
-
-        if (s2->flags & (1<<estCGP))
-        {
-            /* Copy the CG p vector */
-            rvec *p1 = s1->cg_p;
-            rvec *p2 = s2->cg_p;
-#pragma omp for schedule(static) nowait
-            for (int i = start; i < end; i++)
-            {
-                copy_rvec(p1[i], p2[i]);
-            }
-        }
-
-        if (DOMAINDECOMP(cr))
-        {
-            s2->ddp_count = s1->ddp_count;
-            if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
-            {
-#pragma omp barrier
-                s2->cg_gl_nalloc = s1->cg_gl_nalloc;
-                srenew(s2->cg_gl, s2->cg_gl_nalloc);
-#pragma omp barrier
-            }
-            s2->ncg_gl = s1->ncg_gl;
-#pragma omp for schedule(static) nowait
-            for (int i = 0; i < s2->ncg_gl; i++)
-            {
-                s2->cg_gl[i] = s1->cg_gl[i];
-            }
-            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
-        }
-    }
-
-    if (constr)
-    {
-        wallcycle_start(wcycle, ewcCONSTR);
-        dvdl_constr = 0;
-        constrain(NULL, TRUE, TRUE, constr, &top->idef,
-                  ir, cr, count, 0, 1.0, md,
-                  s1->x, s2->x, NULL, bMolPBC, s2->box,
-                  s2->lambda[efptBONDED], &dvdl_constr,
-                  NULL, NULL, nrnb, econqCoord);
-        wallcycle_stop(wcycle, ewcCONSTR);
-    }
-}
-
-static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
-                                   gmx_mtop_t *top_global, t_inputrec *ir,
-                                   em_state_t *ems, gmx_localtop_t *top,
-                                   t_mdatoms *mdatoms, t_forcerec *fr,
-                                   gmx_vsite_t *vsite, gmx_constr_t constr,
-                                   t_nrnb *nrnb, gmx_wallcycle_t wcycle)
-{
-    /* Repartition the domain decomposition */
-    dd_partition_system(fplog, step, cr, FALSE, 1,
-                        NULL, top_global, ir,
-                        &ems->s, &ems->f,
-                        mdatoms, top, fr, vsite, NULL, constr,
-                        nrnb, wcycle, FALSE);
-    dd_store_state(cr->dd, &ems->s);
-}
-
-static void evaluate_energy(FILE *fplog, t_commrec *cr,
-                            gmx_mtop_t *top_global,
-                            em_state_t *ems, gmx_localtop_t *top,
-                            t_inputrec *inputrec,
-                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                            gmx_global_stat_t gstat,
-                            gmx_vsite_t *vsite, gmx_constr_t constr,
-                            t_fcdata *fcd,
-                            t_graph *graph, t_mdatoms *mdatoms,
-                            t_forcerec *fr, rvec mu_tot,
-                            gmx_enerdata_t *enerd, tensor vir, tensor pres,
-                            gmx_int64_t count, gmx_bool bFirst)
-{
-    real     t;
-    gmx_bool bNS;
-    tensor   force_vir, shake_vir, ekin;
-    real     dvdl_constr, prescorr, enercorr, dvdlcorr;
-    real     terminate = 0;
-
-    /* Set the time to the initial time, the time does not change during EM */
-    t = inputrec->init_t;
-
-    if (bFirst ||
-        (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
-    {
-        /* This is the first state or an old state used before the last ns */
-        bNS = TRUE;
-    }
-    else
-    {
-        bNS = FALSE;
-        if (inputrec->nstlist > 0)
-        {
-            bNS = TRUE;
-        }
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, ems->s.x, 1, NULL,
-                         top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, ems->s.box);
-    }
-
-    if (DOMAINDECOMP(cr) && bNS)
-    {
-        /* Repartition the domain decomposition */
-        em_dd_partition_system(fplog, count, cr, top_global, inputrec,
-                               ems, top, mdatoms, fr, vsite, constr,
-                               nrnb, wcycle);
-    }
-
-    /* Calc force & energy on new trial position  */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    do_force(fplog, cr, inputrec,
-             count, nrnb, wcycle, top, &top_global->groups,
-             ems->s.box, ems->s.x, &ems->s.hist,
-             ems->f, force_vir, mdatoms, enerd, fcd,
-             ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
-             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
-             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
-             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
-
-    /* Clear the unused shake virial and pressure */
-    clear_mat(shake_vir);
-    clear_mat(pres);
-
-    /* Communicate stuff when parallel */
-    if (PAR(cr) && inputrec->eI != eiNM)
-    {
-        wallcycle_start(wcycle, ewcMoveE);
-
-        global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
-                    inputrec, NULL, NULL, NULL, 1, &terminate,
-                    top_global, &ems->s, FALSE,
-                    CGLO_ENERGY |
-                    CGLO_PRESSURE |
-                    CGLO_CONSTRAINT);
-
-        wallcycle_stop(wcycle, ewcMoveE);
-    }
-
-    /* Calculate long range corrections to pressure and energy */
-    calc_dispcorr(inputrec, fr, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
-                  pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
-    enerd->term[F_DISPCORR] = enercorr;
-    enerd->term[F_EPOT]    += enercorr;
-    enerd->term[F_PRES]    += prescorr;
-    enerd->term[F_DVDL]    += dvdlcorr;
-
-    ems->epot = enerd->term[F_EPOT];
-
-    if (constr)
-    {
-        /* Project out the constraint components of the force */
-        wallcycle_start(wcycle, ewcCONSTR);
-        dvdl_constr = 0;
-        constrain(NULL, FALSE, FALSE, constr, &top->idef,
-                  inputrec, cr, count, 0, 1.0, mdatoms,
-                  ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
-                  ems->s.lambda[efptBONDED], &dvdl_constr,
-                  NULL, &shake_vir, nrnb, econqForceDispl);
-        enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-        m_add(force_vir, shake_vir, vir);
-        wallcycle_stop(wcycle, ewcCONSTR);
-    }
-    else
-    {
-        copy_mat(force_vir, vir);
-    }
-
-    clear_mat(ekin);
-    enerd->term[F_PRES] =
-        calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
-
-    sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
-
-    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
-    {
-        get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
-    }
-}
-
-static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
-                              gmx_mtop_t *mtop,
-                              em_state_t *s_min, em_state_t *s_b)
-{
-    rvec          *fm, *fb, *fmg;
-    t_block       *cgs_gl;
-    int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
-    double         partsum;
-    unsigned char *grpnrFREEZE;
-
-    if (debug)
-    {
-        fprintf(debug, "Doing reorder_partsum\n");
-    }
-
-    fm = s_min->f;
-    fb = s_b->f;
-
-    cgs_gl = dd_charge_groups_global(cr->dd);
-    index  = cgs_gl->index;
-
-    /* Collect fm in a global vector fmg.
-     * This conflicts with the spirit of domain decomposition,
-     * but to fully optimize this a much more complicated algorithm is required.
-     */
-    snew(fmg, mtop->natoms);
-
-    ncg   = s_min->s.ncg_gl;
-    cg_gl = s_min->s.cg_gl;
-    i     = 0;
-    for (c = 0; c < ncg; c++)
-    {
-        cg = cg_gl[c];
-        a0 = index[cg];
-        a1 = index[cg+1];
-        for (a = a0; a < a1; a++)
-        {
-            copy_rvec(fm[i], fmg[a]);
-            i++;
-        }
-    }
-    gmx_sum(mtop->natoms*3, fmg[0], cr);
-
-    /* Now we will determine the part of the sum for the cgs in state s_b */
-    ncg         = s_b->s.ncg_gl;
-    cg_gl       = s_b->s.cg_gl;
-    partsum     = 0;
-    i           = 0;
-    gf          = 0;
-    grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
-    for (c = 0; c < ncg; c++)
-    {
-        cg = cg_gl[c];
-        a0 = index[cg];
-        a1 = index[cg+1];
-        for (a = a0; a < a1; a++)
-        {
-            if (mdatoms->cFREEZE && grpnrFREEZE)
-            {
-                gf = grpnrFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
-                }
-            }
-            i++;
-        }
-    }
-
-    sfree(fmg);
-
-    return partsum;
-}
-
-static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
-                    gmx_mtop_t *mtop,
-                    em_state_t *s_min, em_state_t *s_b)
-{
-    rvec  *fm, *fb;
-    double sum;
-    int    gf, i, m;
-
-    /* This is just the classical Polak-Ribiere calculation of beta;
-     * it looks a bit complicated since we take freeze groups into account,
-     * and might have to sum it in parallel runs.
-     */
-
-    if (!DOMAINDECOMP(cr) ||
-        (s_min->s.ddp_count == cr->dd->ddp_count &&
-         s_b->s.ddp_count   == cr->dd->ddp_count))
-    {
-        fm  = s_min->f;
-        fb  = s_b->f;
-        sum = 0;
-        gf  = 0;
-        /* This part of code can be incorrect with DD,
-         * since the atom ordering in s_b and s_min might differ.
-         */
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!opts->nFreeze[gf][m])
-                {
-                    sum += (fb[i][m] - fm[i][m])*fb[i][m];
-                }
-            }
-        }
-    }
-    else
-    {
-        /* We need to reorder cgs while summing */
-        sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
-    }
-    if (PAR(cr))
-    {
-        gmx_sumd(1, &sum, cr);
-    }
-
-    return sum/sqr(s_min->fnorm);
-}
-
-double do_cg(FILE *fplog, t_commrec *cr,
-             int nfile, const t_filenm fnm[],
-             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
-             int gmx_unused nstglobalcomm,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int gmx_unused stepout,
-             t_inputrec *inputrec,
-             gmx_mtop_t *top_global, t_fcdata *fcd,
-             t_state *state_global,
-             t_mdatoms *mdatoms,
-             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-             gmx_edsam_t gmx_unused ed,
-             t_forcerec *fr,
-             int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-             gmx_membed_t gmx_unused membed,
-             real gmx_unused cpt_period, real gmx_unused max_hours,
-             int imdport,
-             unsigned long gmx_unused Flags,
-             gmx_walltime_accounting_t walltime_accounting)
-{
-    const char       *CG = "Polak-Ribiere Conjugate Gradients";
-
-    em_state_t       *s_min, *s_a, *s_b, *s_c;
-    gmx_localtop_t   *top;
-    gmx_enerdata_t   *enerd;
-    rvec             *f;
-    gmx_global_stat_t gstat;
-    t_graph          *graph;
-    rvec             *f_global, *p, *sf;
-    double            gpa, gpb, gpc, tmp, minstep;
-    real              fnormn;
-    real              stepsize;
-    real              a, b, c, beta = 0.0;
-    real              epot_repl = 0;
-    real              pnorm;
-    t_mdebin         *mdebin;
-    gmx_bool          converged, foundlower;
-    rvec              mu_tot;
-    gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
-    tensor            vir, pres;
-    int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
-    gmx_mdoutf_t      outf;
-    int               i, m, gf, step, nminstep;
-
-    step = 0;
-
-    s_min = init_em_state();
-    s_a   = init_em_state();
-    s_b   = init_em_state();
-    s_c   = init_em_state();
-
-    /* Init em and store the local state in s_min */
-    init_em(fplog, CG, cr, inputrec,
-            state_global, top_global, s_min, &top, &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        sp_header(stderr, CG, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, CG, inputrec->em_tol, number_steps);
-    }
-
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole in congrad.c
-     */
-    evaluate_energy(fplog, cr,
-                    top_global, s_min, top,
-                    inputrec, nrnb, wcycle, gstat,
-                    vsite, constr, fcd, graph, mdatoms, fr,
-                    mu_tot, enerd, vir, pres, -1, TRUE);
-    where();
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                   mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
-                   NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-        print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
-        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-    }
-    where();
-
-    /* Estimate/guess the initial stepsize */
-    stepsize = inputrec->em_stepsize/s_min->fnorm;
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
-                s_min->fmax, s_min->a_fmax+1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n",
-                s_min->fnorm/sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
-                s_min->fmax, s_min->a_fmax+1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n",
-                s_min->fnorm/sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-    /* Start the loop over CG steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
-    {
-
-        /* start taking steps in a new direction
-         * First time we enter the routine, beta=0, and the direction is
-         * simply the negative gradient.
-         */
-
-        /* Calculate the new direction in p, and the gradient in this direction, gpa */
-        p   = s_min->s.cg_p;
-        sf  = s_min->f;
-        gpa = 0;
-        gf  = 0;
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            if (mdatoms->cFREEZE)
-            {
-                gf = mdatoms->cFREEZE[i];
-            }
-            for (m = 0; m < DIM; m++)
-            {
-                if (!inputrec->opts.nFreeze[gf][m])
-                {
-                    p[i][m] = sf[i][m] + beta*p[i][m];
-                    gpa    -= p[i][m]*sf[i][m];
-                    /* f is negative gradient, thus the sign */
-                }
-                else
-                {
-                    p[i][m] = 0;
-                }
-            }
-        }
-
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpa, cr);
-        }
-
-        /* Calculate the norm of the search vector */
-        get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
-
-        /* Just in case stepsize reaches zero due to numerical precision... */
-        if (stepsize <= 0)
-        {
-            stepsize = inputrec->em_stepsize/pnorm;
-        }
-
-        /*
-         * Double check the value of the derivative in the search direction.
-         * If it is positive it must be due to the old information in the
-         * CG formula, so just remove that and start over with beta=0.
-         * This corresponds to a steepest descent step.
-         */
-        if (gpa > 0)
-        {
-            beta = 0;
-            step--;   /* Don't count this step since we are restarting */
-            continue; /* Go back to the beginning of the big for-loop */
-        }
-
-        /* Calculate minimum allowed stepsize, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        minstep = 0;
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                tmp = fabs(s_min->s.x[i][m]);
-                if (tmp < 1.0)
-                {
-                    tmp = 1.0;
-                }
-                tmp      = p[i][m]/tmp;
-                minstep += tmp*tmp;
-            }
-        }
-        /* Add up from all CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &minstep, cr);
-        }
-
-        minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
-                      top_global, inputrec, step,
-                      s_min, state_global, f_global);
-
-        /* Take a step downhill.
-         * In theory, we should minimize the function along this direction.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new CG step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * the continue straight to the next CG step without trying to find any minimum.
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to even accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-        s_a->epot = s_min->epot;
-        a         = 0.0;
-        c         = a + stepsize; /* reference position along line is zero */
-
-        if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
-        {
-            em_dd_partition_system(fplog, step, cr, top_global, inputrec,
-                                   s_min, top, mdatoms, fr, vsite, constr,
-                                   nrnb, wcycle);
-        }
-
-        /* Take a trial step (new coords in s_c) */
-        do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
-                   constr, top, nrnb, wcycle, -1);
-
-        neval++;
-        /* Calculate energy for the trial step */
-        evaluate_energy(fplog, cr,
-                        top_global, s_c, top,
-                        inputrec, nrnb, wcycle, gstat,
-                        vsite, constr, fcd, graph, mdatoms, fr,
-                        mu_tot, enerd, vir, pres, -1, FALSE);
-
-        /* Calc derivative along line */
-        p   = s_c->s.cg_p;
-        sf  = s_c->f;
-        gpc = 0;
-        for (i = 0; i < mdatoms->homenr; i++)
-        {
-            for (m = 0; m < DIM; m++)
-            {
-                gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
-            }
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        /* This is the max amount of increase in energy we tolerate */
-        tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
-
-        /* Accept the step if the energy is lower, or if it is not significantly higher
-         * and the line derivative is still negative.
-         */
-        if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
-        {
-            foundlower = TRUE;
-            /* Great, we found a better energy. Increase step for next iteration
-             * if we are still going down, decrease it otherwise
-             */
-            if (gpc < 0)
-            {
-                stepsize *= 1.618034; /* The golden section */
-            }
-            else
-            {
-                stepsize *= 0.618034; /* 1/golden section */
-            }
-        }
-        else
-        {
-            /* New energy is the same or higher. We will have to do some work
-             * to find a smaller value in the interval. Take smaller step next time!
-             */
-            foundlower = FALSE;
-            stepsize  *= 0.618034;
-        }
-
-
-
-
-        /* OK, if we didn't find a lower value we will have to locate one now - there must
-         * be one in the interval [a=0,c].
-         * The same thing is valid here, though: Don't spend dozens of iterations to find
-         * the line minimum. We try to interpolate based on the derivative at the endpoints,
-         * and only continue until we find a lower value. In most cases this means 1-2 iterations.
-         *
-         * I also have a safeguard for potentially really pathological functions so we never
-         * take more than 20 steps before we give up ...
-         *
-         * If we already found a lower value we just skip this step and continue to the update.
-         */
-        if (!foundlower)
-        {
-            nminstep = 0;
-
-            do
-            {
-                /* Select a new trial point.
-                 * If the derivatives at points a & c have different sign we interpolate to zero,
-                 * otherwise just do a bisection.
-                 */
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa*(a-c)/(gpc-gpa);
-                }
-                else
-                {
-                    b = 0.5*(a+c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5*(a+c);
-                }
-
-                if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-                {
-                    /* Reload the old state */
-                    em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
-                                           s_min, top, mdatoms, fr, vsite, constr,
-                                           nrnb, wcycle);
-                }
-
-                /* Take a trial step to this new point - new coords in s_b */
-                do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
-                           constr, top, nrnb, wcycle, -1);
-
-                neval++;
-                /* Calculate energy for the trial step */
-                evaluate_energy(fplog, cr,
-                                top_global, s_b, top,
-                                inputrec, nrnb, wcycle, gstat,
-                                vsite, constr, fcd, graph, mdatoms, fr,
-                                mu_tot, enerd, vir, pres, -1, FALSE);
-
-                /* p does not change within a step, but since the domain decomposition
-                 * might change, we have to use cg_p of s_b here.
-                 */
-                p   = s_b->s.cg_p;
-                sf  = s_b->f;
-                gpb = 0;
-                for (i = 0; i < mdatoms->homenr; i++)
-                {
-                    for (m = 0; m < DIM; m++)
-                    {
-                        gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
-                    }
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                if (debug)
-                {
-                    fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
-                            s_a->epot, s_b->epot, s_c->epot, gpb);
-                }
-
-                epot_repl = s_b->epot;
-
-                /* Keep one of the intervals based on the value of the derivative at the new point */
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    swap_em_state(s_b, s_c);
-                    c   = b;
-                    gpc = gpb;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    swap_em_state(s_b, s_a);
-                    a   = b;
-                    gpa = gpb;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            }
-            while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
-                   (nminstep < 20));
-
-            if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
-                nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If beta==0 this was steepest descent, and then we give up.
-                 * If not, set beta=0 and restart with steepest descent before quitting.
-                 */
-                if (beta == 0.0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory before giving up */
-                    beta = 0.0;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in B.
-             */
-            if (s_c->epot < s_a->epot)
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
-                            s_c->epot, s_a->epot);
-                }
-                swap_em_state(s_b, s_c);
-                gpb = gpc;
-            }
-            else
-            {
-                if (debug)
-                {
-                    fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
-                            s_a->epot, s_c->epot);
-                }
-                swap_em_state(s_b, s_a);
-                gpb = gpa;
-            }
-
-        }
-        else
-        {
-            if (debug)
-            {
-                fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
-                        s_c->epot);
-            }
-            swap_em_state(s_b, s_c);
-            gpb = gpc;
-        }
-
-        /* new search direction */
-        /* beta = 0 means forget all memory and restart with steepest descents. */
-        if (nstcg && ((step % nstcg) == 0))
-        {
-            beta = 0.0;
-        }
-        else
-        {
-            /* s_min->fnorm cannot be zero, because then we would have converged
-             * and broken out.
-             */
-
-            /* Polak-Ribiere update.
-             * Change to fnorm2/fnorm2_old for Fletcher-Reeves
-             */
-            beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
-        }
-        /* Limit beta to prevent oscillations */
-        if (fabs(beta) > 5.0)
-        {
-            beta = 0.0;
-        }
-
-
-        /* update positions */
-        swap_em_state(s_min, s_b);
-        gpa = gpb;
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (bVerbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
-                        step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
-                        s_min->fmax, s_min->a_fmax+1);
-            }
-            /* Store the new (lower) energies */
-            upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                       mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
-                       NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-
-            /* Prepare IMD energy record, if bIMD is TRUE. */
-            IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE);
-
-            if (do_log)
-            {
-                print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
-            }
-            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                       do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-        }
-
-        /* Send energies and positions to the IMD client if bIMD is TRUE. */
-        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
-        {
-            IMD_send_positions(inputrec->imd);
-        }
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (s_min->fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(inputrec->bIMD, inputrec->imd);
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-
-    }
-    if (s_min->fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
-            warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    if (MASTER(cr))
-    {
-        /* If we printed energy and/or logfile last step (which was the last step)
-         * we don't have to do it again, but otherwise print the final values.
-         */
-        if (!do_log)
-        {
-            /* Write final value to log since we didn't do anything the last step */
-            print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
-        }
-        if (!do_ene || !do_log)
-        {
-            /* Write final energy file entries */
-            print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                       !do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-        }
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
-
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, step,
-                  s_min, state_global, f_global);
-
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fnormn = s_min->fnorm/sqrtNumAtoms;
-        print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-        print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-
-    return 0;
-} /* That's all folks */
-
-
-double do_lbfgs(FILE *fplog, t_commrec *cr,
-                int nfile, const t_filenm fnm[],
-                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
-                int gmx_unused nstglobalcomm,
-                gmx_vsite_t *vsite, gmx_constr_t constr,
-                int gmx_unused stepout,
-                t_inputrec *inputrec,
-                gmx_mtop_t *top_global, t_fcdata *fcd,
-                t_state *state,
-                t_mdatoms *mdatoms,
-                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                gmx_edsam_t gmx_unused ed,
-                t_forcerec *fr,
-                int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-                gmx_membed_t gmx_unused membed,
-                real gmx_unused cpt_period, real gmx_unused max_hours,
-                int imdport,
-                unsigned long gmx_unused Flags,
-                gmx_walltime_accounting_t walltime_accounting)
-{
-    static const char *LBFGS = "Low-Memory BFGS Minimizer";
-    em_state_t         ems;
-    gmx_localtop_t    *top;
-    gmx_enerdata_t    *enerd;
-    rvec              *f;
-    gmx_global_stat_t  gstat;
-    t_graph           *graph;
-    rvec              *f_global;
-    int                ncorr, nmaxcorr, point, cp, neval, nminstep;
-    double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
-    real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
-    real              *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
-    real               a, b, c, maxdelta, delta;
-    real               diag, Epot0, Epot, EpotA, EpotB, EpotC;
-    real               dgdx, dgdg, sq, yr, beta;
-    t_mdebin          *mdebin;
-    gmx_bool           converged;
-    rvec               mu_tot;
-    real               fnorm, fmax;
-    gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
-    tensor             vir, pres;
-    int                start, end, number_steps;
-    gmx_mdoutf_t       outf;
-    int                i, k, m, n, nfmax, gf, step;
-    int                mdof_flags;
-
-    if (PAR(cr))
-    {
-        gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
-    }
-
-    if (NULL != constr)
-    {
-        gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
-    }
-
-    n        = 3*state->natoms;
-    nmaxcorr = inputrec->nbfgscorr;
-
-    /* Allocate memory */
-    /* Use pointers to real so we dont have to loop over both atoms and
-     * dimensions all the time...
-     * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
-     * that point to the same memory.
-     */
-    snew(xa, n);
-    snew(xb, n);
-    snew(xc, n);
-    snew(fa, n);
-    snew(fb, n);
-    snew(fc, n);
-    snew(frozen, n);
-
-    snew(p, n);
-    snew(lastx, n);
-    snew(lastf, n);
-    snew(rho, nmaxcorr);
-    snew(alpha, nmaxcorr);
-
-    snew(dx, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dx[i], n);
-    }
-
-    snew(dg, nmaxcorr);
-    for (i = 0; i < nmaxcorr; i++)
-    {
-        snew(dg[i], n);
-    }
-
-    step  = 0;
-    neval = 0;
-
-    /* Init em */
-    init_em(fplog, LBFGS, cr, inputrec,
-            state, top_global, &ems, &top, &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
-    /* Do_lbfgs is not completely updated like do_steep and do_cg,
-     * so we free some memory again.
-     */
-    sfree(ems.s.x);
-    sfree(ems.f);
-
-    xx = (real *)state->x;
-    ff = (real *)f;
-
-    start = 0;
-    end   = mdatoms->homenr;
-
-    /* Print to log file */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
-
-    do_log = do_ene = do_x = do_f = TRUE;
-
-    /* Max number of steps */
-    number_steps = inputrec->nsteps;
-
-    /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
-    gf = 0;
-    for (i = start; i < end; i++)
-    {
-        if (mdatoms->cFREEZE)
-        {
-            gf = mdatoms->cFREEZE[i];
-        }
-        for (m = 0; m < DIM; m++)
-        {
-            frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
-        }
-    }
-    if (MASTER(cr))
-    {
-        sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
-    }
-
-    if (vsite)
-    {
-        construct_vsites(vsite, state->x, 1, NULL,
-                         top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, state->box);
-    }
-
-    /* Call the force routine and some auxiliary (neighboursearching etc.) */
-    /* do_force always puts the charge groups in the box and shifts again
-     * We do not unshift, so molecules are always whole
-     */
-    neval++;
-    ems.s.x = state->x;
-    ems.f   = f;
-    evaluate_energy(fplog, cr,
-                    top_global, &ems, top,
-                    inputrec, nrnb, wcycle, gstat,
-                    vsite, constr, fcd, graph, mdatoms, fr,
-                    mu_tot, enerd, vir, pres, -1, TRUE);
-    where();
-
-    if (MASTER(cr))
-    {
-        /* Copy stuff to the energy bin for easy printing etc. */
-        upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                   mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
-                   NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
-        print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-    }
-    where();
-
-    /* This is the starting energy */
-    Epot = enerd->term[F_EPOT];
-
-    fnorm = ems.fnorm;
-    fmax  = ems.fmax;
-    nfmax = ems.a_fmax;
-
-    /* Set the initial step.
-     * since it will be multiplied by the non-normalized search direction
-     * vector (force vector the first time), we scale it by the
-     * norm of the force.
-     */
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
-        fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
-        fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
-        fprintf(stderr, "\n");
-        /* and copy to the log file too... */
-        fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
-        fprintf(fplog, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
-        fprintf(fplog, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
-        fprintf(fplog, "\n");
-    }
-
-    // Point is an index to the memory of search directions, where 0 is the first one.
-    point = 0;
-
-    // Set initial search direction to the force (-gradient), or 0 for frozen particles.
-    for (i = 0; i < n; i++)
-    {
-        if (!frozen[i])
-        {
-            dx[point][i] = ff[i]; /* Initial search direction */
-        }
-        else
-        {
-            dx[point][i] = 0;
-        }
-    }
-
-    // Stepsize will be modified during the search, and actually it is not critical
-    // (the main efficiency in the algorithm comes from changing directions), but
-    // we still need an initial value, so estimate it as the inverse of the norm
-    // so we take small steps where the potential fluctuates a lot.
-    stepsize  = 1.0/fnorm;
-
-    /* Start the loop over BFGS steps.
-     * Each successful step is counted, and we continue until
-     * we either converge or reach the max number of steps.
-     */
-
-    ncorr = 0;
-
-    /* Set the gradient from the force */
-    converged = FALSE;
-    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
-    {
-
-        /* Write coordinates if necessary */
-        do_x = do_per_step(step, inputrec->nstxout);
-        do_f = do_per_step(step, inputrec->nstfout);
-
-        mdof_flags = 0;
-        if (do_x)
-        {
-            mdof_flags |= MDOF_X;
-        }
-
-        if (do_f)
-        {
-            mdof_flags |= MDOF_F;
-        }
-
-        if (inputrec->bIMD)
-        {
-            mdof_flags |= MDOF_IMD;
-        }
-
-        mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
-                                         top_global, step, (real)step, state, state, f, f);
-
-        /* Do the linesearching in the direction dx[point][0..(n-1)] */
-
-        /* make s a pointer to current search direction - point=0 first time we get here */
-        s = dx[point];
-
-        // calculate line gradient in position A
-        for (gpa = 0, i = 0; i < n; i++)
-        {
-            gpa -= s[i]*ff[i];
-        }
-
-        /* Calculate minimum allowed stepsize along the line, before the average (norm)
-         * relative change in coordinate is smaller than precision
-         */
-        for (minstep = 0, i = 0; i < n; i++)
-        {
-            tmp = fabs(xx[i]);
-            if (tmp < 1.0)
-            {
-                tmp = 1.0;
-            }
-            tmp      = s[i]/tmp;
-            minstep += tmp*tmp;
-        }
-        minstep = GMX_REAL_EPS/sqrt(minstep/n);
-
-        if (stepsize < minstep)
-        {
-            converged = TRUE;
-            break;
-        }
-
-        // Before taking any steps along the line, store the old position
-        for (i = 0; i < n; i++)
-        {
-            lastx[i] = xx[i];
-            lastf[i] = ff[i];
-        }
-        Epot0 = Epot;
-
-        for (i = 0; i < n; i++)
-        {
-            xa[i] = xx[i];
-        }
-
-        /* Take a step downhill.
-         * In theory, we should find the actual minimum of the function in this
-         * direction, somewhere along the line.
-         * That is quite possible, but it turns out to take 5-10 function evaluations
-         * for each line. However, we dont really need to find the exact minimum -
-         * it is much better to start a new BFGS step in a modified direction as soon
-         * as we are close to it. This will save a lot of energy evaluations.
-         *
-         * In practice, we just try to take a single step.
-         * If it worked (i.e. lowered the energy), we increase the stepsize but
-         * continue straight to the next BFGS step without trying to find any minimum,
-         * i.e. we change the search direction too. If the line was smooth, it is
-         * likely we are in a smooth region, and then it makes sense to take longer
-         * steps in the modified search direction too.
-         *
-         * If it didn't work (higher energy), there must be a minimum somewhere between
-         * the old position and the new one. Then we need to start by finding a lower
-         * value before we change search direction. Since the energy was apparently
-         * quite rough, we need to decrease the step size.
-         *
-         * Due to the finite numerical accuracy, it turns out that it is a good idea
-         * to accept a SMALL increase in energy, if the derivative is still downhill.
-         * This leads to lower final energies in the tests I've done. / Erik
-         */
-
-        // State "A" is the first position along the line.
-        // reference position along line is initially zero
-        EpotA      = Epot0;
-        a          = 0.0;
-
-        // Check stepsize first. We do not allow displacements
-        // larger than emstep.
-        //
-        do
-        {
-            // Pick a new position C by adding stepsize to A.
-            c        = a + stepsize;
-
-            // Calculate what the largest change in any individual coordinate
-            // would be (translation along line * gradient along line)
-            maxdelta = 0;
-            for (i = 0; i < n; i++)
-            {
-                delta = c*s[i];
-                if (delta > maxdelta)
-                {
-                    maxdelta = delta;
-                }
-            }
-            // If any displacement is larger than the stepsize limit, reduce the step
-            if (maxdelta > inputrec->em_stepsize)
-            {
-                stepsize *= 0.1;
-            }
-        }
-        while (maxdelta > inputrec->em_stepsize);
-
-        // Take a trial step and move the coordinate array xc[] to position C
-        for (i = 0; i < n; i++)
-        {
-            xc[i] = lastx[i] + c*s[i];
-        }
-
-        neval++;
-        // Calculate energy for the trial step in position C
-        ems.s.x = (rvec *)xc;
-        ems.f   = (rvec *)fc;
-        evaluate_energy(fplog, cr,
-                        top_global, &ems, top,
-                        inputrec, nrnb, wcycle, gstat,
-                        vsite, constr, fcd, graph, mdatoms, fr,
-                        mu_tot, enerd, vir, pres, step, FALSE);
-        EpotC = ems.epot;
-
-        // Calc line gradient in position C
-        for (gpc = 0, i = 0; i < n; i++)
-        {
-            gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
-        }
-        /* Sum the gradient along the line across CPUs */
-        if (PAR(cr))
-        {
-            gmx_sumd(1, &gpc, cr);
-        }
-
-        // This is the max amount of increase in energy we tolerate.
-        // By allowing VERY small changes (close to numerical precision) we
-        // frequently find even better (lower) final energies.
-        tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
-
-        // Accept the step if the energy is lower in the new position C (compared to A),
-        // or if it is not significantly higher and the line derivative is still negative.
-        if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
-        {
-            // Great, we found a better energy. We no longer try to alter the
-            // stepsize, but simply accept this new better position. The we select a new
-            // search direction instead, which will be much more efficient than continuing
-            // to take smaller steps along a line. Set fnorm based on the new C position,
-            // which will be used to update the stepsize to 1/fnorm further down.
-            foundlower = TRUE;
-            fnorm      = ems.fnorm;
-        }
-        else
-        {
-            // If we got here, the energy is NOT lower in point C, i.e. it will be the same
-            // or higher than in point A. In this case it is pointless to move to point C,
-            // so we will have to do more iterations along the same line to find a smaller
-            // value in the interval [A=0.0,C].
-            // Here, A is still 0.0, but that will change when we do a search in the interval
-            // [0.0,C] below. That search we will do by interpolation or bisection rather
-            // than with the stepsize, so no need to modify it. For the next search direction
-            // it will be reset to 1/fnorm anyway.
-            foundlower = FALSE;
-        }
-
-        if (!foundlower)
-        {
-            // OK, if we didn't find a lower value we will have to locate one now - there must
-            // be one in the interval [a,c].
-            // The same thing is valid here, though: Don't spend dozens of iterations to find
-            // the line minimum. We try to interpolate based on the derivative at the endpoints,
-            // and only continue until we find a lower value. In most cases this means 1-2 iterations.
-            // I also have a safeguard for potentially really pathological functions so we never
-            // take more than 20 steps before we give up.
-            // If we already found a lower value we just skip this step and continue to the update.
-            nminstep = 0;
-            do
-            {
-                // Select a new trial point B in the interval [A,C].
-                // If the derivatives at points a & c have different sign we interpolate to zero,
-                // otherwise just do a bisection since there might be multiple minima/maxima
-                // inside the interval.
-                if (gpa < 0 && gpc > 0)
-                {
-                    b = a + gpa*(a-c)/(gpc-gpa);
-                }
-                else
-                {
-                    b = 0.5*(a+c);
-                }
-
-                /* safeguard if interpolation close to machine accuracy causes errors:
-                 * never go outside the interval
-                 */
-                if (b <= a || b >= c)
-                {
-                    b = 0.5*(a+c);
-                }
-
-                // Take a trial step to point B
-                for (i = 0; i < n; i++)
-                {
-                    xb[i] = lastx[i] + b*s[i];
-                }
-
-                neval++;
-                // Calculate energy for the trial step in point B
-                ems.s.x = (rvec *)xb;
-                ems.f   = (rvec *)fb;
-                evaluate_energy(fplog, cr,
-                                top_global, &ems, top,
-                                inputrec, nrnb, wcycle, gstat,
-                                vsite, constr, fcd, graph, mdatoms, fr,
-                                mu_tot, enerd, vir, pres, step, FALSE);
-                EpotB = ems.epot;
-                fnorm = ems.fnorm;
-
-                // Calculate gradient in point B
-                for (gpb = 0, i = 0; i < n; i++)
-                {
-                    gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
-
-                }
-                /* Sum the gradient along the line across CPUs */
-                if (PAR(cr))
-                {
-                    gmx_sumd(1, &gpb, cr);
-                }
-
-                // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
-                // at the new point B, and rename the endpoints of this new interval A and C.
-                if (gpb > 0)
-                {
-                    /* Replace c endpoint with b */
-                    EpotC = EpotB;
-                    c     = b;
-                    gpc   = gpb;
-                    /* swap coord pointers b/c */
-                    xtmp = xb;
-                    ftmp = fb;
-                    xb   = xc;
-                    fb   = fc;
-                    xc   = xtmp;
-                    fc   = ftmp;
-                }
-                else
-                {
-                    /* Replace a endpoint with b */
-                    EpotA = EpotB;
-                    a     = b;
-                    gpa   = gpb;
-                    /* swap coord pointers a/b */
-                    xtmp = xb;
-                    ftmp = fb;
-                    xb   = xa;
-                    fb   = fa;
-                    xa   = xtmp;
-                    fa   = ftmp;
-                }
-
-                /*
-                 * Stop search as soon as we find a value smaller than the endpoints,
-                 * or if the tolerance is below machine precision.
-                 * Never run more than 20 steps, no matter what.
-                 */
-                nminstep++;
-            }
-            while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
-
-            if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
-            {
-                /* OK. We couldn't find a significantly lower energy.
-                 * If ncorr==0 this was steepest descent, and then we give up.
-                 * If not, reset memory to restart as steepest descent before quitting.
-                 */
-                if (ncorr == 0)
-                {
-                    /* Converged */
-                    converged = TRUE;
-                    break;
-                }
-                else
-                {
-                    /* Reset memory */
-                    ncorr = 0;
-                    /* Search in gradient direction */
-                    for (i = 0; i < n; i++)
-                    {
-                        dx[point][i] = ff[i];
-                    }
-                    /* Reset stepsize */
-                    stepsize = 1.0/fnorm;
-                    continue;
-                }
-            }
-
-            /* Select min energy state of A & C, put the best in xx/ff/Epot
-             */
-            if (EpotC < EpotA)
-            {
-                Epot = EpotC;
-                /* Use state C */
-                for (i = 0; i < n; i++)
-                {
-                    xx[i] = xc[i];
-                    ff[i] = fc[i];
-                }
-                step_taken = c;
-            }
-            else
-            {
-                Epot = EpotA;
-                /* Use state A */
-                for (i = 0; i < n; i++)
-                {
-                    xx[i] = xa[i];
-                    ff[i] = fa[i];
-                }
-                step_taken = a;
-            }
-
-        }
-        else
-        {
-            /* found lower */
-            Epot = EpotC;
-            /* Use state C */
-            for (i = 0; i < n; i++)
-            {
-                xx[i] = xc[i];
-                ff[i] = fc[i];
-            }
-            step_taken = c;
-        }
-
-        /* Update the memory information, and calculate a new
-         * approximation of the inverse hessian
-         */
-
-        /* Have new data in Epot, xx, ff */
-        if (ncorr < nmaxcorr)
-        {
-            ncorr++;
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            dg[point][i]  = lastf[i]-ff[i];
-            dx[point][i] *= step_taken;
-        }
-
-        dgdg = 0;
-        dgdx = 0;
-        for (i = 0; i < n; i++)
-        {
-            dgdg += dg[point][i]*dg[point][i];
-            dgdx += dg[point][i]*dx[point][i];
-        }
-
-        diag = dgdx/dgdg;
-
-        rho[point] = 1.0/dgdx;
-        point++;
-
-        if (point >= nmaxcorr)
-        {
-            point = 0;
-        }
-
-        /* Update */
-        for (i = 0; i < n; i++)
-        {
-            p[i] = ff[i];
-        }
-
-        cp = point;
-
-        /* Recursive update. First go back over the memory points */
-        for (k = 0; k < ncorr; k++)
-        {
-            cp--;
-            if (cp < 0)
-            {
-                cp = ncorr-1;
-            }
-
-            sq = 0;
-            for (i = 0; i < n; i++)
-            {
-                sq += dx[cp][i]*p[i];
-            }
-
-            alpha[cp] = rho[cp]*sq;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] -= alpha[cp]*dg[cp][i];
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            p[i] *= diag;
-        }
-
-        /* And then go forward again */
-        for (k = 0; k < ncorr; k++)
-        {
-            yr = 0;
-            for (i = 0; i < n; i++)
-            {
-                yr += p[i]*dg[cp][i];
-            }
-
-            beta = rho[cp]*yr;
-            beta = alpha[cp]-beta;
-
-            for (i = 0; i < n; i++)
-            {
-                p[i] += beta*dx[cp][i];
-            }
-
-            cp++;
-            if (cp >= ncorr)
-            {
-                cp = 0;
-            }
-        }
-
-        for (i = 0; i < n; i++)
-        {
-            if (!frozen[i])
-            {
-                dx[point][i] = p[i];
-            }
-            else
-            {
-                dx[point][i] = 0;
-            }
-        }
-
-        /* Test whether the convergence criterion is met */
-        get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
-
-        /* Print it if necessary */
-        if (MASTER(cr))
-        {
-            if (bVerbose)
-            {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
-                fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
-                        step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1);
-            }
-            /* Store the new (lower) energies */
-            upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                       mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
-                       NULL, NULL, vir, pres, NULL, mu_tot, constr);
-            do_log = do_per_step(step, inputrec->nstlog);
-            do_ene = do_per_step(step, inputrec->nstenergy);
-            if (do_log)
-            {
-                print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
-            }
-            print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
-                       do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-        }
-
-        /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr))
-        {
-            IMD_send_positions(inputrec->imd);
-        }
-
-        // Reset stepsize in we are doing more iterations
-        stepsize = 1.0/fnorm;
-
-        /* Stop when the maximum force lies below tolerance.
-         * If we have reached machine precision, converged is already set to true.
-         */
-        converged = converged || (fmax < inputrec->em_tol);
-
-    } /* End of the loop */
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(inputrec->bIMD, inputrec->imd);
-
-    if (converged)
-    {
-        step--; /* we never took that last step in this case */
-
-    }
-    if (fmax > inputrec->em_tol)
-    {
-        if (MASTER(cr))
-        {
-            warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
-            warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
-        }
-        converged = FALSE;
-    }
-
-    /* If we printed energy and/or logfile last step (which was the last step)
-     * we don't have to do it again, but otherwise print the final values.
-     */
-    if (!do_log) /* Write final value to log since we didn't do anythin last step */
-    {
-        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
-    }
-    if (!do_ene || !do_log) /* Write final energy file entries */
-    {
-        print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
-                   !do_log ? fplog : NULL, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-    }
-
-    /* Print some stuff... */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-
-    /* IMPORTANT!
-     * For accurate normal mode calculation it is imperative that we
-     * store the last conformation into the full precision binary trajectory.
-     *
-     * However, we should only do it if we did NOT already write this step
-     * above (which we did if do_x or do_f was true).
-     */
-    do_x = !do_per_step(step, inputrec->nstxout);
-    do_f = !do_per_step(step, inputrec->nstfout);
-    write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, step,
-                  &ems, state, f);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
-        print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
-                        number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
-        print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
-                        number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
-
-        fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    walltime_accounting_set_nsteps_done(walltime_accounting, step);
-
-    return 0;
-} /* That's all folks */
-
-
-double do_steep(FILE *fplog, t_commrec *cr,
-                int nfile, const t_filenm fnm[],
-                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
-                int gmx_unused nstglobalcomm,
-                gmx_vsite_t *vsite, gmx_constr_t constr,
-                int gmx_unused stepout,
-                t_inputrec *inputrec,
-                gmx_mtop_t *top_global, t_fcdata *fcd,
-                t_state *state_global,
-                t_mdatoms *mdatoms,
-                t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-                gmx_edsam_t gmx_unused  ed,
-                t_forcerec *fr,
-                int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-                gmx_membed_t gmx_unused membed,
-                real gmx_unused cpt_period, real gmx_unused max_hours,
-                int imdport,
-                unsigned long gmx_unused Flags,
-                gmx_walltime_accounting_t walltime_accounting)
-{
-    const char       *SD = "Steepest Descents";
-    em_state_t       *s_min, *s_try;
-    rvec             *f_global;
-    gmx_localtop_t   *top;
-    gmx_enerdata_t   *enerd;
-    rvec             *f;
-    gmx_global_stat_t gstat;
-    t_graph          *graph;
-    real              stepsize;
-    real              ustep, fnormn;
-    gmx_mdoutf_t      outf;
-    t_mdebin         *mdebin;
-    gmx_bool          bDone, bAbort, do_x, do_f;
-    tensor            vir, pres;
-    rvec              mu_tot;
-    int               nsteps;
-    int               count          = 0;
-    int               steps_accepted = 0;
-
-    s_min = init_em_state();
-    s_try = init_em_state();
-
-    /* Init em and store the local state in s_try */
-    init_em(fplog, SD, cr, inputrec,
-            state_global, top_global, s_try, &top, &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
-
-    /* Print to log file  */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
-
-    /* Set variables for stepsize (in nm). This is the largest
-     * step that we are going to make in any direction.
-     */
-    ustep    = inputrec->em_stepsize;
-    stepsize = 0;
-
-    /* Max number of steps  */
-    nsteps = inputrec->nsteps;
-
-    if (MASTER(cr))
-    {
-        /* Print to the screen  */
-        sp_header(stderr, SD, inputrec->em_tol, nsteps);
-    }
-    if (fplog)
-    {
-        sp_header(fplog, SD, inputrec->em_tol, nsteps);
-    }
-
-    /**** HERE STARTS THE LOOP ****
-     * count is the counter for the number of steps
-     * bDone will be TRUE when the minimization has converged
-     * bAbort will be TRUE when nsteps steps have been performed or when
-     * the stepsize becomes smaller than is reasonable for machine precision
-     */
-    count  = 0;
-    bDone  = FALSE;
-    bAbort = FALSE;
-    while (!bDone && !bAbort)
-    {
-        bAbort = (nsteps >= 0) && (count == nsteps);
-
-        /* set new coordinates, except for first step */
-        if (count > 0)
-        {
-            do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
-                       s_min, stepsize, s_min->f, s_try,
-                       constr, top, nrnb, wcycle, count);
-        }
-
-        evaluate_energy(fplog, cr,
-                        top_global, s_try, top,
-                        inputrec, nrnb, wcycle, gstat,
-                        vsite, constr, fcd, graph, mdatoms, fr,
-                        mu_tot, enerd, vir, pres, count, count == 0);
-
-        if (MASTER(cr))
-        {
-            print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
-        }
-
-        if (count == 0)
-        {
-            s_min->epot = s_try->epot;
-        }
-
-        /* Print it if necessary  */
-        if (MASTER(cr))
-        {
-            if (bVerbose)
-            {
-                fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
-                        count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
-                        ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
-            }
-
-            if ( (count == 0) || (s_try->epot < s_min->epot) )
-            {
-                /* Store the new (lower) energies  */
-                upd_mdebin(mdebin, FALSE, FALSE, (double)count,
-                           mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
-                           s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
-
-                /* Prepare IMD energy record, if bIMD is TRUE. */
-                IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE);
-
-                print_ebin(mdoutf_get_fp_ene(outf), TRUE,
-                           do_per_step(steps_accepted, inputrec->nstdisreout),
-                           do_per_step(steps_accepted, inputrec->nstorireout),
-                           fplog, count, count, eprNORMAL, TRUE,
-                           mdebin, fcd, &(top_global->groups), &(inputrec->opts));
-                fflush(fplog);
-            }
-        }
-
-        /* Now if the new energy is smaller than the previous...
-         * or if this is the first step!
-         * or if we did random steps!
-         */
-
-        if ( (count == 0) || (s_try->epot < s_min->epot) )
-        {
-            steps_accepted++;
-
-            /* Test whether the convergence criterion is met...  */
-            bDone = (s_try->fmax < inputrec->em_tol);
-
-            /* Copy the arrays for force, positions and energy  */
-            /* The 'Min' array always holds the coords and forces of the minimal
-               sampled energy  */
-            swap_em_state(s_min, s_try);
-            if (count > 0)
-            {
-                ustep *= 1.2;
-            }
-
-            /* Write to trn, if necessary */
-            do_x = do_per_step(steps_accepted, inputrec->nstxout);
-            do_f = do_per_step(steps_accepted, inputrec->nstfout);
-            write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
-                          top_global, inputrec, count,
-                          s_min, state_global, f_global);
-        }
-        else
-        {
-            /* If energy is not smaller make the step smaller...  */
-            ustep *= 0.5;
-
-            if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
-            {
-                /* Reload the old state */
-                em_dd_partition_system(fplog, count, cr, top_global, inputrec,
-                                       s_min, top, mdatoms, fr, vsite, constr,
-                                       nrnb, wcycle);
-            }
-        }
-
-        /* Determine new step  */
-        stepsize = ustep/s_min->fmax;
-
-        /* Check if stepsize is too small, with 1 nm as a characteristic length */
-#ifdef GMX_DOUBLE
-        if (count == nsteps || ustep < 1e-12)
-#else
-        if (count == nsteps || ustep < 1e-6)
-#endif
-        {
-            if (MASTER(cr))
-            {
-                warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
-                warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
-            }
-            bAbort = TRUE;
-        }
-
-        /* Send IMD energies and positions, if bIMD is TRUE. */
-        if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
-        {
-            IMD_send_positions(inputrec->imd);
-        }
-
-        count++;
-    } /* End of the loop  */
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(inputrec->bIMD, inputrec->imd);
-
-    /* Print some data...  */
-    if (MASTER(cr))
-    {
-        fprintf(stderr, "\nwriting lowest energy coordinates.\n");
-    }
-    write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
-                  top_global, inputrec, count,
-                  s_min, state_global, f_global);
-
-    if (MASTER(cr))
-    {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
-        fnormn = s_min->fnorm/sqrtNumAtoms;
-
-        print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-        print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
-                        s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    /* To print the actual number of steps we needed somewhere */
-    inputrec->nsteps = count;
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, count);
-
-    return 0;
-} /* That's all folks */
-
-
-double do_nm(FILE *fplog, t_commrec *cr,
-             int nfile, const t_filenm fnm[],
-             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused  bCompact,
-             int gmx_unused nstglobalcomm,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int gmx_unused stepout,
-             t_inputrec *inputrec,
-             gmx_mtop_t *top_global, t_fcdata *fcd,
-             t_state *state_global,
-             t_mdatoms *mdatoms,
-             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-             gmx_edsam_t  gmx_unused ed,
-             t_forcerec *fr,
-             int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-             gmx_membed_t gmx_unused membed,
-             real gmx_unused cpt_period, real gmx_unused max_hours,
-             int imdport,
-             unsigned long gmx_unused Flags,
-             gmx_walltime_accounting_t walltime_accounting)
-{
-    const char          *NM = "Normal Mode Analysis";
-    gmx_mdoutf_t         outf;
-    int                  natoms, atom, d;
-    int                  nnodes, node;
-    rvec                *f_global;
-    gmx_localtop_t      *top;
-    gmx_enerdata_t      *enerd;
-    rvec                *f;
-    gmx_global_stat_t    gstat;
-    t_graph             *graph;
-    tensor               vir, pres;
-    rvec                 mu_tot;
-    rvec                *fneg, *dfdx;
-    gmx_bool             bSparse; /* use sparse matrix storage format */
-    size_t               sz = 0;
-    gmx_sparsematrix_t * sparse_matrix           = NULL;
-    real           *     full_matrix             = NULL;
-    em_state_t       *   state_work;
-
-    /* added with respect to mdrun */
-    int        i, j, k, row, col;
-    real       der_range = 10.0*sqrt(GMX_REAL_EPS);
-    real       x_min;
-    bool       bIsMaster = MASTER(cr);
-
-    if (constr != NULL)
-    {
-        gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
-    }
-
-    state_work = init_em_state();
-
-    /* Init em and store the local state in state_minimum */
-    init_em(fplog, NM, cr, inputrec,
-            state_global, top_global, state_work, &top,
-            &f, &f_global,
-            nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
-            nfile, fnm, &outf, NULL, imdport, Flags, wcycle);
-
-    natoms = top_global->natoms;
-    snew(fneg, natoms);
-    snew(dfdx, natoms);
-
-#ifndef GMX_DOUBLE
-    if (bIsMaster)
-    {
-        fprintf(stderr,
-                "NOTE: This version of GROMACS has been compiled in single precision,\n"
-                "      which MIGHT not be accurate enough for normal mode analysis.\n"
-                "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
-                "      are fairly modest even if you recompile in double precision.\n\n");
-    }
-#endif
-
-    /* Check if we can/should use sparse storage format.
-     *
-     * Sparse format is only useful when the Hessian itself is sparse, which it
-     * will be when we use a cutoff.
-     * For small systems (n<1000) it is easier to always use full matrix format, though.
-     */
-    if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
-    {
-        md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
-        bSparse = FALSE;
-    }
-    else if (top_global->natoms < 1000)
-    {
-        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
-        bSparse = FALSE;
-    }
-    else
-    {
-        md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n");
-        bSparse = TRUE;
-    }
-
-    if (bIsMaster)
-    {
-        sz = DIM*top_global->natoms;
-
-        fprintf(stderr, "Allocating Hessian memory...\n\n");
-
-        if (bSparse)
-        {
-            sparse_matrix = gmx_sparsematrix_init(sz);
-            sparse_matrix->compressed_symmetric = TRUE;
-        }
-        else
-        {
-            snew(full_matrix, sz*sz);
-        }
-    }
-
-    init_nrnb(nrnb);
-
-    where();
-
-    /* Write start time and temperature */
-    print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
-
-    /* fudge nr of steps to nr of atoms */
-    inputrec->nsteps = natoms*2;
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
-                *(top_global->name), (int)inputrec->nsteps);
-    }
-
-    nnodes = cr->nnodes;
-
-    /* Make evaluate_energy do a single node force calculation */
-    cr->nnodes = 1;
-    evaluate_energy(fplog, cr,
-                    top_global, state_work, top,
-                    inputrec, nrnb, wcycle, gstat,
-                    vsite, constr, fcd, graph, mdatoms, fr,
-                    mu_tot, enerd, vir, pres, -1, TRUE);
-    cr->nnodes = nnodes;
-
-    /* if forces are not small, warn user */
-    get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
-
-    md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax);
-    if (state_work->fmax > 1.0e-3)
-    {
-        md_print_info(cr, fplog,
-                      "The force is probably not small enough to "
-                      "ensure that you are at a minimum.\n"
-                      "Be aware that negative eigenvalues may occur\n"
-                      "when the resulting matrix is diagonalized.\n\n");
-    }
-
-    /***********************************************************
-     *
-     *      Loop over all pairs in matrix
-     *
-     *      do_force called twice. Once with positive and
-     *      once with negative displacement
-     *
-     ************************************************************/
-
-    /* Steps are divided one by one over the nodes */
-    for (atom = cr->nodeid; atom < natoms; atom += nnodes)
-    {
-
-        for (d = 0; d < DIM; d++)
-        {
-            x_min = state_work->s.x[atom][d];
-
-            state_work->s.x[atom][d] = x_min - der_range;
-
-            /* Make evaluate_energy do a single node force calculation */
-            cr->nnodes = 1;
-            evaluate_energy(fplog, cr,
-                            top_global, state_work, top,
-                            inputrec, nrnb, wcycle, gstat,
-                            vsite, constr, fcd, graph, mdatoms, fr,
-                            mu_tot, enerd, vir, pres, atom*2, FALSE);
-
-            for (i = 0; i < natoms; i++)
-            {
-                copy_rvec(state_work->f[i], fneg[i]);
-            }
-
-            state_work->s.x[atom][d] = x_min + der_range;
-
-            evaluate_energy(fplog, cr,
-                            top_global, state_work, top,
-                            inputrec, nrnb, wcycle, gstat,
-                            vsite, constr, fcd, graph, mdatoms, fr,
-                            mu_tot, enerd, vir, pres, atom*2+1, FALSE);
-            cr->nnodes = nnodes;
-
-            /* x is restored to original */
-            state_work->s.x[atom][d] = x_min;
-
-            for (j = 0; j < natoms; j++)
-            {
-                for (k = 0; (k < DIM); k++)
-                {
-                    dfdx[j][k] =
-                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
-                }
-            }
-
-            if (!bIsMaster)
-            {
-#ifdef GMX_MPI
-#ifdef GMX_DOUBLE
-#define mpi_type MPI_DOUBLE
-#else
-#define mpi_type MPI_FLOAT
-#endif
-                MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
-                         cr->mpi_comm_mygroup);
-#endif
-            }
-            else
-            {
-                for (node = 0; (node < nnodes && atom+node < natoms); node++)
-                {
-                    if (node > 0)
-                    {
-#ifdef GMX_MPI
-                        MPI_Status stat;
-                        MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
-                                 cr->mpi_comm_mygroup, &stat);
-#undef mpi_type
-#endif
-                    }
-
-                    row = (atom + node)*DIM + d;
-
-                    for (j = 0; j < natoms; j++)
-                    {
-                        for (k = 0; k < DIM; k++)
-                        {
-                            col = j*DIM + k;
-
-                            if (bSparse)
-                            {
-                                if (col >= row && dfdx[j][k] != 0.0)
-                                {
-                                    gmx_sparsematrix_increment_value(sparse_matrix,
-                                                                     row, col, dfdx[j][k]);
-                                }
-                            }
-                            else
-                            {
-                                full_matrix[row*sz+col] = dfdx[j][k];
-                            }
-                        }
-                    }
-                }
-            }
-
-            if (bVerbose && fplog)
-            {
-                fflush(fplog);
-            }
-        }
-        /* write progress */
-        if (bIsMaster && bVerbose)
-        {
-            fprintf(stderr, "\rFinished step %d out of %d",
-                    std::min(atom+nnodes, natoms), natoms);
-            fflush(stderr);
-        }
-    }
-
-    if (bIsMaster)
-    {
-        fprintf(stderr, "\n\nWriting Hessian...\n");
-        gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
-    }
-
-    finish_em(cr, outf, walltime_accounting, wcycle);
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2);
-
-    return 0;
-}
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/md.cpp b/patches/gromacs-5.1.4.diff/src/programs/mdrun/md.cpp
deleted file mode 100644
index d4bb5960fb564e760c7372ebb56cce5d280a3cd2..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/md.cpp
+++ /dev/null
@@ -1,2038 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "thread_mpi/threads.h"
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_network.h"
-#include "gromacs/ewald/pme-load-balancing.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/filenm.h"
-#include "gromacs/fileio/mdoutf.h"
-#include "gromacs/fileio/trajectory_writing.h"
-#include "gromacs/fileio/trx.h"
-#include "gromacs/fileio/trxio.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/ebin.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdebin.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/nrnb.h"
-#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/legacyheaders/shellfc.h"
-#include "gromacs/legacyheaders/sighandler.h"
-#include "gromacs/legacyheaders/sim_util.h"
-#include "gromacs/legacyheaders/tgroup.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/update.h"
-#include "gromacs/legacyheaders/vcm.h"
-#include "gromacs/legacyheaders/vsite.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/legacyheaders/types/constr.h"
-#include "gromacs/legacyheaders/types/enums.h"
-#include "gromacs/legacyheaders/types/fcdata.h"
-#include "gromacs/legacyheaders/types/force_flags.h"
-#include "gromacs/legacyheaders/types/forcerec.h"
-#include "gromacs/legacyheaders/types/group.h"
-#include "gromacs/legacyheaders/types/inputrec.h"
-#include "gromacs/legacyheaders/types/interaction_const.h"
-#include "gromacs/legacyheaders/types/mdatom.h"
-#include "gromacs/legacyheaders/types/membedt.h"
-#include "gromacs/legacyheaders/types/nrnb.h"
-#include "gromacs/legacyheaders/types/oenv.h"
-#include "gromacs/legacyheaders/types/shellfc.h"
-#include "gromacs/legacyheaders/types/state.h"
-#include "gromacs/listed-forces/manage-threading.h"
-#include "gromacs/math/utilities.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/compute_io.h"
-#include "gromacs/mdlib/mdrun_signalling.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/atoms.h"
-#include "gromacs/topology/idef.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/topology/topology.h"
-#include "gromacs/utility/basedefinitions.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "deform.h"
-#include "membed.h"
-#include "repl_ex.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-/* END PLUMED */
-
-/* PLUMED HREX */
-extern int plumed_hrex;
-/* END PLUMED HREX */
-
-#ifdef GMX_FAHCORE
-#include "corewrap.h"
-#endif
-
-static void reset_all_counters(FILE *fplog, t_commrec *cr,
-                               gmx_int64_t step,
-                               gmx_int64_t *step_rel, t_inputrec *ir,
-                               gmx_wallcycle_t wcycle, t_nrnb *nrnb,
-                               gmx_walltime_accounting_t walltime_accounting,
-                               struct nonbonded_verlet_t *nbv)
-{
-    char sbuf[STEPSTRSIZE];
-
-    /* Reset all the counters related to performance over the run */
-    md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
-                  gmx_step_str(step, sbuf));
-
-    if (use_GPU(nbv))
-    {
-        nbnxn_gpu_reset_timings(nbv);
-    }
-
-    wallcycle_stop(wcycle, ewcRUN);
-    wallcycle_reset_all(wcycle);
-    if (DOMAINDECOMP(cr))
-    {
-        reset_dd_statistics_counters(cr->dd);
-    }
-    init_nrnb(nrnb);
-    ir->init_step += *step_rel;
-    ir->nsteps    -= *step_rel;
-    *step_rel      = 0;
-    wallcycle_start(wcycle, ewcRUN);
-    walltime_accounting_start(walltime_accounting);
-    print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime());
-}
-
-double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
-             const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
-             int nstglobalcomm,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int stepout, t_inputrec *ir,
-             gmx_mtop_t *top_global,
-             t_fcdata *fcd,
-             t_state *state_global,
-             t_mdatoms *mdatoms,
-             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-             gmx_edsam_t ed, t_forcerec *fr,
-             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed,
-             real cpt_period, real max_hours,
-             int imdport,
-             unsigned long Flags,
-             gmx_walltime_accounting_t walltime_accounting)
-{
-    gmx_mdoutf_t    outf = NULL;
-    gmx_int64_t     step, step_rel;
-    double          elapsed_time;
-    double          t, t0, lam0[efptNR];
-    gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
-    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
-                    bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep,
-                    bBornRadii, bStartingFromCpt;
-    gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
-    gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
-                      bForceUpdate = FALSE, bCPT;
-    gmx_bool          bMasterState;
-    int               force_flags, cglo_flags;
-    tensor            force_vir, shake_vir, total_vir, tmp_vir, pres;
-    int               i, m;
-    t_trxstatus      *status;
-    rvec              mu_tot;
-    t_vcm            *vcm;
-    matrix            pcoupl_mu, M;
-    t_trxframe        rerun_fr;
-    gmx_repl_ex_t     repl_ex = NULL;
-    int               nchkpt  = 1;
-    gmx_localtop_t   *top;
-    t_mdebin         *mdebin   = NULL;
-    t_state          *state    = NULL;
-    rvec             *f_global = NULL;
-    gmx_enerdata_t   *enerd;
-    rvec             *f = NULL;
-    gmx_global_stat_t gstat;
-    gmx_update_t      upd   = NULL;
-    t_graph          *graph = NULL;
-    gmx_signalling_t  gs;
-    gmx_groups_t     *groups;
-    gmx_ekindata_t   *ekind;
-    gmx_shellfc_t     shellfc;
-    int               count, nconverged = 0;
-    double            tcount                 = 0;
-    gmx_bool          bConverged             = TRUE, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
-    gmx_bool          bResetCountersHalfMaxH = FALSE;
-    gmx_bool          bVV, bTemp, bPres, bTrotter;
-    gmx_bool          bUpdateDoLR;
-    real              dvdl_constr;
-    rvec             *cbuf        = NULL;
-    int               cbuf_nalloc = 0;
-    matrix            lastbox;
-    int               lamnew  = 0;
-    /* for FEP */
-    int               nstfep = 0;
-    double            cycles;
-    real              saved_conserved_quantity = 0;
-    real              last_ekin                = 0;
-    t_extmass         MassQ;
-    int             **trotter_seq;
-    char              sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
-    int               handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/
-    gmx_int64_t       multisim_nsteps        = -1;                 /* number of steps to do  before first multisim
-                                                                          simulation stops. If equal to zero, don't
-                                                                          communicate any more between multisims.*/
-    /* PME load balancing data for GPU kernels */
-    pme_load_balancing_t *pme_loadbal      = NULL;
-    gmx_bool              bPMETune         = FALSE;
-    gmx_bool              bPMETunePrinting = FALSE;
-
-    /* Interactive MD */
-    gmx_bool          bIMDstep = FALSE;
-
-    /* PLUMED */
-    int plumedNeedsEnergy=0;
-    int plumedWantsToStop=0;
-    matrix plumed_vir;
-    /* END PLUMED */
-
-#ifdef GMX_FAHCORE
-    /* Temporary addition for FAHCORE checkpointing */
-    int chkpt_ret;
-#endif
-
-    /* Check for special mdrun options */
-    bRerunMD = (Flags & MD_RERUN);
-    if (Flags & MD_RESETCOUNTERSHALFWAY)
-    {
-        if (ir->nsteps > 0)
-        {
-            /* Signal to reset the counters half the simulation steps. */
-            wcycle_set_reset_counters(wcycle, ir->nsteps/2);
-        }
-        /* Signal to reset the counters halfway the simulation time. */
-        bResetCountersHalfMaxH = (max_hours > 0);
-    }
-
-    /* md-vv uses averaged full step velocities for T-control
-       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
-       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
-    bVV      = EI_VV(ir->eI);
-    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
-
-    if (bRerunMD)
-    {
-        /* Since we don't know if the frames read are related in any way,
-         * rebuild the neighborlist at every step.
-         */
-        ir->nstlist       = 1;
-        ir->nstcalcenergy = 1;
-        nstglobalcomm     = 1;
-    }
-
-    check_ir_old_tpx_versions(cr, fplog, ir, top_global);
-
-    nstglobalcomm   = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir);
-    bGStatEveryStep = (nstglobalcomm == 1);
-
-    if (bRerunMD)
-    {
-        ir->nstxout_compressed = 0;
-    }
-    groups = &top_global->groups;
-
-    /* Initial values */
-    init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda,
-            &(state_global->fep_state), lam0,
-            nrnb, top_global, &upd,
-            nfile, fnm, &outf, &mdebin,
-            force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle);
-
-    clear_mat(total_vir);
-    clear_mat(pres);
-    /* Energy terms and groups */
-    snew(enerd, 1);
-    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
-                  enerd);
-    if (DOMAINDECOMP(cr))
-    {
-        f = NULL;
-    }
-    else
-    {
-        snew(f, top_global->natoms);
-    }
-
-    /* Kinetic energy data */
-    snew(ekind, 1);
-    init_ekindata(fplog, top_global, &(ir->opts), ekind);
-    /* Copy the cos acceleration to the groups struct */
-    ekind->cosacc.cos_accel = ir->cos_accel;
-
-    gstat = global_stat_init(ir);
-    debug_gmx();
-
-    /* Check for polarizable models and flexible constraints */
-    shellfc = init_shell_flexcon(fplog,
-                                 top_global, n_flexible_constraints(constr),
-                                 (ir->bContinuation ||
-                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
-                                 NULL : state_global->x);
-    if (shellfc && ir->nstcalcenergy != 1)
-    {
-        gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy);
-    }
-    if (shellfc && DOMAINDECOMP(cr))
-    {
-        gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank");
-    }
-    if (shellfc && ir->eI == eiNM)
-    {
-        /* Currently shells don't work with Normal Modes */
-        gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
-    }
-
-    if (vsite && ir->eI == eiNM)
-    {
-        /* Currently virtual sites don't work with Normal Modes */
-        gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
-    }
-
-    if (DEFORM(*ir))
-    {
-        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
-        set_deform_reference_box(upd,
-                                 deform_init_init_step_tpx,
-                                 deform_init_box_tpx);
-        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
-    }
-
-    {
-        double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1);
-        if ((io > 2000) && MASTER(cr))
-        {
-            fprintf(stderr,
-                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
-                    io);
-        }
-    }
-
-    if (DOMAINDECOMP(cr))
-    {
-        top = dd_init_local_top(top_global);
-
-        snew(state, 1);
-        dd_init_local_state(cr->dd, state_global, state);
-
-        if (DDMASTER(cr->dd) && ir->nstfout)
-        {
-            snew(f_global, state_global->natoms);
-        }
-    }
-    else
-    {
-        top = gmx_mtop_generate_local_top(top_global, ir);
-
-        state    = serial_init_local_state(state_global);
-        f_global = f;
-
-        atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);
-
-        if (vsite)
-        {
-            set_vsite_top(vsite, top, mdatoms, cr);
-        }
-
-        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
-        {
-            graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE);
-        }
-
-        if (shellfc)
-        {
-            make_local_shells(cr, mdatoms, shellfc);
-        }
-
-        setup_bonded_threading(fr, &top->idef);
-    }
-
-    /* Set up interactive MD (IMD) */
-    init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x,
-             nfile, fnm, oenv, imdport, Flags);
-
-    if (DOMAINDECOMP(cr))
-    {
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
-                            state_global, top_global, ir,
-                            state, &f, mdatoms, top, fr,
-                            vsite, shellfc, constr,
-                            nrnb, NULL, FALSE);
-
-    }
-
-    update_mdatoms(mdatoms, state->lambda[efptMASS]);
-
-    if (opt2bSet("-cpi", nfile, fnm))
-    {
-        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr);
-    }
-    else
-    {
-        bStateFromCP = FALSE;
-    }
-
-    if (ir->bExpanded)
-    {
-        init_expanded_ensemble(bStateFromCP, ir, &state->dfhist);
-    }
-
-    if (MASTER(cr))
-    {
-        if (bStateFromCP)
-        {
-            /* Update mdebin with energy history if appending to output files */
-            if (Flags & MD_APPENDFILES)
-            {
-                restore_energyhistory_from_state(mdebin, &state_global->enerhist);
-            }
-            else
-            {
-                /* We might have read an energy history from checkpoint,
-                 * free the allocated memory and reset the counts.
-                 */
-                done_energyhistory(&state_global->enerhist);
-                init_energyhistory(&state_global->enerhist);
-            }
-        }
-        /* Set the initial energy history in state by updating once */
-        update_energyhistory(&state_global->enerhist, mdebin);
-    }
-
-    /* Initialize constraints */
-    if (constr && !DOMAINDECOMP(cr))
-    {
-        set_constraints(constr, top, ir, mdatoms, cr);
-    }
-
-    if (repl_ex_nst > 0 && MASTER(cr))
-    {
-        repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir,
-                                        repl_ex_nst, repl_ex_nex, repl_ex_seed);
-    }
-
-    /* PME tuning is only supported with PME for Coulomb. Is is not supported
-     * with only LJ PME, or for reruns.
-     */
-    bPMETune = ((Flags & MD_TUNEPME) && EEL_PME(fr->eeltype) && !bRerunMD &&
-                !(Flags & MD_REPRODUCIBLE));
-    if (bPMETune)
-    {
-        pme_loadbal_init(&pme_loadbal, cr, fplog, ir, state->box,
-                         fr->ic, fr->pmedata, use_GPU(fr->nbv),
-                         &bPMETunePrinting);
-    }
-
-    if (!ir->bContinuation && !bRerunMD)
-    {
-        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
-        {
-            /* Set the velocities of frozen particles to zero */
-            for (i = 0; i < mdatoms->homenr; i++)
-            {
-                for (m = 0; m < DIM; m++)
-                {
-                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
-                    {
-                        state->v[i][m] = 0;
-                    }
-                }
-            }
-        }
-
-        if (constr)
-        {
-            /* Constrain the initial coordinates and velocities */
-            do_constrain_first(fplog, constr, ir, mdatoms, state,
-                               cr, nrnb, fr, top);
-        }
-        if (vsite)
-        {
-            /* Construct the virtual sites for the initial configuration */
-            construct_vsites(vsite, state->x, ir->delta_t, NULL,
-                             top->idef.iparams, top->idef.il,
-                             fr->ePBC, fr->bMolPBC, cr, state->box);
-        }
-    }
-
-    debug_gmx();
-
-    if (IR_TWINRANGE(*ir) && repl_ex_nst % ir->nstcalclr != 0)
-    {
-        /* We should exchange at nstcalclr steps to get correct integration */
-        gmx_fatal(FARGS, "The replica exchange period (%d) is not divisible by nstcalclr (%d)", repl_ex_nst, ir->nstcalclr);
-    }
-
-    if (ir->efep != efepNO)
-    {
-        /* Set free energy calculation frequency as the greatest common
-         * denominator of nstdhdl and repl_ex_nst.
-         * Check for nstcalclr with twin-range, since we need the long-range
-         * contribution to the free-energy at the correct (nstcalclr) steps.
-         */
-        nstfep = ir->fepvals->nstdhdl;
-        if (ir->bExpanded)
-        {
-            if (IR_TWINRANGE(*ir) &&
-                ir->expandedvals->nstexpanded % ir->nstcalclr != 0)
-            {
-                gmx_fatal(FARGS, "nstexpanded should be divisible by nstcalclr");
-            }
-            nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
-        }
-        if (repl_ex_nst > 0)
-        {
-            nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep);
-        }
-        /* We checked divisibility of repl_ex_nst and nstcalclr above */
-        if (IR_TWINRANGE(*ir) && nstfep % ir->nstcalclr != 0)
-        {
-            gmx_incons("nstfep not divisible by nstcalclr");
-        }
-    }
-
-    /* Be REALLY careful about what flags you set here. You CANNOT assume
-     * this is the first step, since we might be restarting from a checkpoint,
-     * and in that case we should not do any modifications to the state.
-     */
-    bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
-
-    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
-                  | (bStopCM ? CGLO_STOPCM : 0)
-                  | (bVV ? CGLO_PRESSURE : 0)
-                  | (bVV ? CGLO_CONSTRAINT : 0)
-                  | (bRerunMD ? CGLO_RERUNMD : 0)
-                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0));
-
-    bSumEkinhOld = FALSE;
-    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                    NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                    constr, NULL, FALSE, state->box,
-                    top_global, &bSumEkinhOld, cglo_flags);
-    if (ir->eI == eiVVAK)
-    {
-        /* a second call to get the half step temperature initialized as well */
-        /* we do the same call as above, but turn the pressure off -- internally to
-           compute_globals, this is recognized as a velocity verlet half-step
-           kinetic energy calculation.  This minimized excess variables, but
-           perhaps loses some logic?*/
-
-        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                        NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                        constr, NULL, FALSE, state->box,
-                        top_global, &bSumEkinhOld,
-                        cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE));
-    }
-
-    /* Calculate the initial half step temperature, and save the ekinh_old */
-    if (!(Flags & MD_STARTFROMCPT))
-    {
-        for (i = 0; (i < ir->opts.ngtc); i++)
-        {
-            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
-        }
-    }
-    if (ir->eI != eiVV)
-    {
-        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
-                                     and there is no previous step */
-    }
-
-    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
-       temperature control */
-    trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
-
-    if (MASTER(cr))
-    {
-        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
-        {
-            fprintf(fplog,
-                    "RMS relative constraint deviation after constraining: %.2e\n",
-                    constr_rmsd(constr, FALSE));
-        }
-        if (EI_STATE_VELOCITY(ir->eI))
-        {
-            fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]);
-        }
-        if (bRerunMD)
-        {
-            fprintf(stderr, "starting md rerun '%s', reading coordinates from"
-                    " input trajectory '%s'\n\n",
-                    *(top_global->name), opt2fn("-rerun", nfile, fnm));
-            if (bVerbose)
-            {
-                fprintf(stderr, "Calculated time to finish depends on nsteps from "
-                        "run input file,\nwhich may not correspond to the time "
-                        "needed to process input trajectory.\n\n");
-            }
-        }
-        else
-        {
-            char tbuf[20];
-            fprintf(stderr, "starting mdrun '%s'\n",
-                    *(top_global->name));
-            if (ir->nsteps >= 0)
-            {
-                sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
-            }
-            else
-            {
-                sprintf(tbuf, "%s", "infinite");
-            }
-            if (ir->init_step > 0)
-            {
-                fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
-                        gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
-                        gmx_step_str(ir->init_step, sbuf2),
-                        ir->init_step*ir->delta_t);
-            }
-            else
-            {
-                fprintf(stderr, "%s steps, %s ps.\n",
-                        gmx_step_str(ir->nsteps, sbuf), tbuf);
-            }
-        }
-        fprintf(fplog, "\n");
-    }
-
-    /* PLUMED */
-    if(plumedswitch){
-      /* detect plumed API version */
-      int pversion=0;
-      plumed_cmd(plumedmain,"getApiVersion",&pversion);
-      /* setting kbT is only implemented with api>1) */
-      real kbT=ir->opts.ref_t[0]*BOLTZ;
-      if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT);
-      if(pversion>2){
-        int res=1;
-        if( (Flags & MD_STARTFROMCPT) ) plumed_cmd(plumedmain,"setRestart",&res);
-      }
-
-      if(cr->ms && cr->ms->nsim>1) {
-        if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&cr->ms->mpi_comm_masters);
-        if(PAR(cr)){
-          if(DOMAINDECOMP(cr)) {
-            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
-          }else{
-            plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
-          }
-        }
-        plumed_cmd(plumedmain,"GREX init",NULL);
-      }
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
-        }
-      }
-      plumed_cmd(plumedmain,"setNatoms",&top_global->natoms);
-      plumed_cmd(plumedmain,"setMDEngine","gromacs");
-      plumed_cmd(plumedmain,"setLog",fplog);
-      real real_delta_t;
-      real_delta_t=ir->delta_t;
-      plumed_cmd(plumedmain,"setTimestep",&real_delta_t);
-      plumed_cmd(plumedmain,"init",NULL);
-
-      if(PAR(cr)){
-        if(DOMAINDECOMP(cr)) {
-          plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
-          plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex);
-        }
-      }
-    }
-    /* END PLUMED */
-
-    walltime_accounting_start(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, "mdrun");
-
-    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
-#ifdef GMX_FAHCORE
-    chkpt_ret = fcCheckPointParallel( cr->nodeid,
-                                      NULL, 0);
-    if (chkpt_ret == 0)
-    {
-        gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
-    }
-#endif
-
-    debug_gmx();
-    /***********************************************************
-     *
-     *             Loop over MD steps
-     *
-     ************************************************************/
-
-    /* if rerunMD then read coordinates and velocities from input trajectory */
-    if (bRerunMD)
-    {
-        if (getenv("GMX_FORCE_UPDATE"))
-        {
-            bForceUpdate = TRUE;
-        }
-
-        rerun_fr.natoms = 0;
-        if (MASTER(cr))
-        {
-            bNotLastFrame = read_first_frame(oenv, &status,
-                                             opt2fn("-rerun", nfile, fnm),
-                                             &rerun_fr, TRX_NEED_X | TRX_READ_V);
-            if (rerun_fr.natoms != top_global->natoms)
-            {
-                gmx_fatal(FARGS,
-                          "Number of atoms in trajectory (%d) does not match the "
-                          "run input file (%d)\n",
-                          rerun_fr.natoms, top_global->natoms);
-            }
-            if (ir->ePBC != epbcNONE)
-            {
-                if (!rerun_fr.bBox)
-                {
-                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time);
-                }
-                if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong))
-                {
-                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time);
-                }
-            }
-        }
-
-        if (PAR(cr))
-        {
-            rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
-        }
-
-        if (ir->ePBC != epbcNONE)
-        {
-            /* Set the shift vectors.
-             * Necessary here when have a static box different from the tpr box.
-             */
-            calc_shifts(rerun_fr.box, fr->shift_vec);
-        }
-    }
-
-    /* loop over MD steps or if rerunMD to end of input trajectory */
-    bFirstStep = TRUE;
-    /* Skip the first Nose-Hoover integration when we get the state from tpx */
-    bStateFromTPX    = !bStateFromCP;
-    bInitStep        = bFirstStep && (bStateFromTPX || bVV);
-    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
-    bSumEkinhOld     = FALSE;
-    bExchanged       = FALSE;
-    bNeedRepartition = FALSE;
-
-    init_global_signals(&gs, cr, ir, repl_ex_nst);
-
-    step     = ir->init_step;
-    step_rel = 0;
-
-    if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
-    {
-        /* check how many steps are left in other sims */
-        multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
-    }
-    /* PLUMED */
-    // we reenable this because it is useful with Metainference
-    //if (MULTISIM(cr) && max_hours > 0)
-    //{
-    //    gmx_fatal(FARGS, "The combination of mdrun -maxh and mdrun -multi is not supported. Please use the nsteps .mdp field.");
-    //}
-    /* PLUMED */
-
-    /* and stop now if we should */
-    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
-                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
-    while (!bLastStep || (bRerunMD && bNotLastFrame))
-    {
-
-        /* Determine if this is a neighbor search step */
-        bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
-
-        if (bPMETune && bNStList)
-        {
-            /* PME grid + cut-off optimization with GPUs or PME nodes */
-            pme_loadbal_do(pme_loadbal, cr,
-                           (bVerbose && MASTER(cr)) ? stderr : NULL,
-                           fplog,
-                           ir, fr, state, wcycle,
-                           step, step_rel,
-                           &bPMETunePrinting);
-        }
-
-        wallcycle_start(wcycle, ewcSTEP);
-
-        if (bRerunMD)
-        {
-            if (rerun_fr.bStep)
-            {
-                step     = rerun_fr.step;
-                step_rel = step - ir->init_step;
-            }
-            if (rerun_fr.bTime)
-            {
-                t = rerun_fr.time;
-            }
-            else
-            {
-                t = step;
-            }
-        }
-        else
-        {
-            bLastStep = (step_rel == ir->nsteps);
-            t         = t0 + step*ir->delta_t;
-        }
-
-        if (ir->efep != efepNO || ir->bSimTemp)
-        {
-            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
-               requiring different logic. */
-
-            set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0);
-            bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
-            bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
-            bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
-                            && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt));
-        }
-
-        bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
-                     do_per_step(step, repl_ex_nst));
-
-        if (bSimAnn)
-        {
-            update_annealing_target_temp(&(ir->opts), t);
-        }
-
-        if (bRerunMD)
-        {
-            if (!DOMAINDECOMP(cr) || MASTER(cr))
-            {
-                for (i = 0; i < state_global->natoms; i++)
-                {
-                    copy_rvec(rerun_fr.x[i], state_global->x[i]);
-                }
-                if (rerun_fr.bV)
-                {
-                    for (i = 0; i < state_global->natoms; i++)
-                    {
-                        copy_rvec(rerun_fr.v[i], state_global->v[i]);
-                    }
-                }
-                else
-                {
-                    for (i = 0; i < state_global->natoms; i++)
-                    {
-                        clear_rvec(state_global->v[i]);
-                    }
-                    if (bRerunWarnNoV)
-                    {
-                        fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n"
-                                "         Ekin, temperature and pressure are incorrect,\n"
-                                "         the virial will be incorrect when constraints are present.\n"
-                                "\n");
-                        bRerunWarnNoV = FALSE;
-                    }
-                }
-            }
-            copy_mat(rerun_fr.box, state_global->box);
-            copy_mat(state_global->box, state->box);
-
-            if (vsite && (Flags & MD_RERUN_VSITE))
-            {
-                if (DOMAINDECOMP(cr))
-                {
-                    gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank");
-                }
-                if (graph)
-                {
-                    /* Following is necessary because the graph may get out of sync
-                     * with the coordinates if we only have every N'th coordinate set
-                     */
-                    mk_mshift(fplog, graph, fr->ePBC, state->box, state->x);
-                    shift_self(graph, state->box, state->x);
-                }
-                construct_vsites(vsite, state->x, ir->delta_t, state->v,
-                                 top->idef.iparams, top->idef.il,
-                                 fr->ePBC, fr->bMolPBC, cr, state->box);
-                if (graph)
-                {
-                    unshift_self(graph, state->box, state->x);
-                }
-            }
-        }
-
-        /* Stop Center of Mass motion */
-        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
-
-        if (bRerunMD)
-        {
-            /* for rerun MD always do Neighbour Searching */
-            bNS      = (bFirstStep || ir->nstlist != 0);
-            bNStList = bNS;
-        }
-        else
-        {
-            /* Determine whether or not to do Neighbour Searching and LR */
-            bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
-        }
-
-        /* check whether we should stop because another simulation has
-           stopped. */
-        if (MULTISIM(cr))
-        {
-            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&
-                 (multisim_nsteps != ir->nsteps) )
-            {
-                if (bNS)
-                {
-                    if (MASTER(cr))
-                    {
-                        fprintf(stderr,
-                                "Stopping simulation %d because another one has finished\n",
-                                cr->ms->sim);
-                    }
-                    bLastStep         = TRUE;
-                    gs.sig[eglsCHKPT] = 1;
-                }
-            }
-        }
-
-        /* < 0 means stop at next step, > 0 means stop at next NS step */
-        if ( (gs.set[eglsSTOPCOND] < 0) ||
-             ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) )
-        {
-            bLastStep = TRUE;
-        }
-
-        /* Determine whether or not to update the Born radii if doing GB */
-        bBornRadii = bFirstStep;
-        if (ir->implicit_solvent && (step % ir->nstgbradii == 0))
-        {
-            bBornRadii = TRUE;
-        }
-
-        /* do_log triggers energy and virial calculation. Because this leads
-         * to different code paths, forces can be different. Thus for exact
-         * continuation we should avoid extra log output.
-         * Note that the || bLastStep can result in non-exact continuation
-         * beyond the last step. But we don't consider that to be an issue.
-         */
-        do_log     = do_per_step(step, ir->nstlog) || (bFirstStep && !bStateFromCP) || bLastStep;
-        do_verbose = bVerbose &&
-            (step % stepout == 0 || bFirstStep || bLastStep);
-
-        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
-        {
-            if (bRerunMD)
-            {
-                bMasterState = TRUE;
-            }
-            else
-            {
-                bMasterState = FALSE;
-                /* Correct the new box if it is too skewed */
-                if (DYNAMIC_BOX(*ir))
-                {
-                    if (correct_box(fplog, step, state->box, graph))
-                    {
-                        bMasterState = TRUE;
-                    }
-                }
-                if (DOMAINDECOMP(cr) && bMasterState)
-                {
-                    dd_collect_state(cr->dd, state, state_global);
-                }
-            }
-
-            if (DOMAINDECOMP(cr))
-            {
-                /* Repartition the domain decomposition */
-                dd_partition_system(fplog, step, cr,
-                                    bMasterState, nstglobalcomm,
-                                    state_global, top_global, ir,
-                                    state, &f, mdatoms, top, fr,
-                                    vsite, shellfc, constr,
-                                    nrnb, wcycle,
-                                    do_verbose && !bPMETunePrinting);
-
-                /* PLUMED */
-                if(plumedswitch){
-                  plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
-                  plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex);
-                }
-                /* END PLUMED */
-            }
-        }
-
-        if (MASTER(cr) && do_log)
-        {
-            print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */
-        }
-
-        if (ir->efep != efepNO)
-        {
-            update_mdatoms(mdatoms, state->lambda[efptMASS]);
-        }
-
-        if ((bRerunMD && rerun_fr.bV) || bExchanged)
-        {
-
-            /* We need the kinetic energy at minus the half step for determining
-             * the full step kinetic energy and possibly for T-coupling.*/
-            /* This may not be quite working correctly yet . . . . */
-            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
-                            constr, NULL, FALSE, state->box,
-                            top_global, &bSumEkinhOld,
-                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
-        }
-        clear_mat(force_vir);
-
-/* PLUMED HREX */
-        gmx_bool bHREX;
-        bHREX = bDoReplEx && plumed_hrex;
-
-        if(plumedswitch) if(bHREX){
-          gmx_enerdata_t *hrex_enerd;
-          snew(hrex_enerd,1);
-          init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,hrex_enerd);
-          int repl=-1;
-          int nrepl=-1;
-          if(MASTER(cr)){
-            repl=replica_exchange_get_repl(repl_ex);
-            nrepl=replica_exchange_get_nrepl(repl_ex);
-          }
-          if (DOMAINDECOMP(cr))
-            dd_collect_state(cr->dd,state,state_global);
-          else
-            copy_state_nonatomdata(state, state_global);
-          if(MASTER(cr)){
-            if(repl%2==step/repl_ex_nst%2){
-              if(repl-1>=0) exchange_state(cr->ms,repl-1,state_global);
-            }else{
-              if(repl+1<nrepl) exchange_state(cr->ms,repl+1,state_global);
-            }
-          }
-          if (!DOMAINDECOMP(cr))
-            copy_state_nonatomdata(state_global, state);
-          if(PAR(cr)){
-            if (DOMAINDECOMP(cr)) {
-              dd_partition_system(fplog,step,cr,TRUE,1,
-                              state_global,top_global,ir,
-                              state,&f,mdatoms,top,fr,vsite,shellfc,constr,
-                              nrnb,wcycle,FALSE);
-            }
-          }
-          do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups,
-                     state->box, state->x, &state->hist,
-                     f, force_vir, mdatoms, hrex_enerd, fcd,
-                     state->lambda, graph,
-                     fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii,
-                    GMX_FORCE_STATECHANGED |
-                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
-                       GMX_FORCE_ALLFORCES |
-                       GMX_FORCE_SEPLRF |
-                       GMX_FORCE_VIRIAL |
-                       GMX_FORCE_ENERGY |
-                       GMX_FORCE_DHDL |
-                       GMX_FORCE_NS |
-                       ( ( fr->bTwinRange && do_per_step(step, ir->nstcalclr) ) ? GMX_FORCE_DO_LR :0) );
-          plumed_cmd(plumedmain,"GREX cacheLocalUSwap",&hrex_enerd->term[F_EPOT]);
-          sfree(hrex_enerd);
-
-/* exchange back */
-          if (DOMAINDECOMP(cr))
-            dd_collect_state(cr->dd,state,state_global);
-          else
-            copy_state_nonatomdata(state, state_global);
-          if(MASTER(cr)){
-            if(repl%2==step/repl_ex_nst%2){
-              if(repl-1>=0) exchange_state(cr->ms,repl-1,state_global);
-            }else{
-              if(repl+1<nrepl) exchange_state(cr->ms,repl+1,state_global);
-            }
-          }
-          if (!DOMAINDECOMP(cr))
-            copy_state_nonatomdata(state_global, state);
-          if(PAR(cr)){
-            if (DOMAINDECOMP(cr)) {
-              dd_partition_system(fplog,step,cr,TRUE,1,
-                              state_global,top_global,ir,
-                              state,&f,mdatoms,top,fr,vsite,shellfc,constr,
-                              nrnb,wcycle,FALSE);
-              if(plumedswitch){
-                plumed_cmd(plumedmain,"setAtomsNlocal",&cr->dd->nat_home);
-                plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->gatindex);
-              }
-            }
-          }
-
-        }
-/* END PLUMED HREX */
-
-        /* We write a checkpoint at this MD step when:
-         * either at an NS step when we signalled through gs,
-         * or at the last step (but not when we do not want confout),
-         * but never at the first step or with rerun.
-         */
-        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
-                 (bLastStep && (Flags & MD_CONFOUT))) &&
-                step > ir->init_step && !bRerunMD);
-        if (bCPT)
-        {
-            gs.set[eglsCHKPT] = 0;
-        }
-
-        /* Determine the energy and pressure:
-         * at nstcalcenergy steps and at energy output steps (set below).
-         */
-        if (EI_VV(ir->eI) && (!bInitStep))
-        {
-            /* for vv, the first half of the integration actually corresponds
-               to the previous step.  bCalcEner is only required to be evaluated on the 'next' step,
-               but the virial needs to be calculated on both the current step and the 'next' step. Future
-               reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */
-
-            /* TODO: This is probably not what we want, we will write to energy file one step after nstcalcenergy steps. */
-            bCalcEnerStep = do_per_step(step - 1, ir->nstcalcenergy);
-            bCalcVir      = bCalcEnerStep ||
-                (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
-        }
-        else
-        {
-            bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
-            bCalcVir      = bCalcEnerStep ||
-                (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
-        }
-        bCalcEner = bCalcEnerStep;
-
-        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
-
-        if (do_ene || do_log || bDoReplEx)
-        {
-            bCalcVir  = TRUE;
-            bCalcEner = TRUE;
-        }
-
-        /* Do we need global communication ? */
-        bGStat = (bCalcVir || bCalcEner || bStopCM ||
-                  do_per_step(step, nstglobalcomm) ||
-                  (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)));
-
-        /* these CGLO_ options remain the same throughout the iteration */
-        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
-                      (bGStat ? CGLO_GSTAT : 0)
-                      );
-
-        force_flags = (GMX_FORCE_STATECHANGED |
-                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
-                       GMX_FORCE_ALLFORCES |
-                       GMX_FORCE_SEPLRF |
-                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
-                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
-                       (bDoFEP ? GMX_FORCE_DHDL : 0)
-                       );
-
-        if (fr->bTwinRange)
-        {
-            if (do_per_step(step, ir->nstcalclr))
-            {
-                force_flags |= GMX_FORCE_DO_LR;
-            }
-        }
-
-        if (shellfc)
-        {
-            /* Now is the time to relax the shells */
-            count = relax_shell_flexcon(fplog, cr, bVerbose, step,
-                                        ir, bNS, force_flags,
-                                        top,
-                                        constr, enerd, fcd,
-                                        state, f, force_vir, mdatoms,
-                                        nrnb, wcycle, graph, groups,
-                                        shellfc, fr, bBornRadii, t, mu_tot,
-                                        &bConverged, vsite,
-                                        mdoutf_get_fp_field(outf));
-            tcount += count;
-
-            if (bConverged)
-            {
-                nconverged++;
-            }
-        }
-        else
-        {
-            /* The coordinates (x) are shifted (to get whole molecules)
-             * in do_force.
-             * This is parallellized as well, and does communication too.
-             * Check comments in sim_util.c
-             */
-
-            /* PLUMED */
-            plumedNeedsEnergy=0;
-            if(plumedswitch){
-              int pversion=0;
-              plumed_cmd(plumedmain,"getApiVersion",&pversion);
-              long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep);
-              plumed_cmd(plumedmain,"setPositions",&state->x[0][0]);
-              plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]);
-              plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]);
-              plumed_cmd(plumedmain,"setBox",&state->box[0][0]);
-              plumed_cmd(plumedmain,"prepareCalc",NULL);
-              plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop);
-              int checkp=0; if(bCPT) checkp=1;
-              if(pversion>3) plumed_cmd(plumedmain,"doCheckPoint",&checkp);
-              plumed_cmd(plumedmain,"setForces",&f[0][0]);
-              plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
-              if(plumedNeedsEnergy) force_flags |= (GMX_FORCE_ENERGY | GMX_FORCE_VIRIAL);
-              clear_mat(plumed_vir);
-              plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]);
-            }
-            /* END PLUMED */
-            do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups,
-                     state->box, state->x, &state->hist,
-                     f, force_vir, mdatoms, enerd, fcd,
-                     state->lambda, graph,
-                     fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii,
-                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
-            /* PLUMED */
-            if(plumedswitch){
-              if(plumedNeedsEnergy){
-                msmul(force_vir,2.0,plumed_vir);
-                plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]);
-                plumed_cmd(plumedmain,"performCalc",NULL);
-                msmul(plumed_vir,0.5,force_vir);
-              } else {
-                msmul(plumed_vir,0.5,plumed_vir);
-                m_add(force_vir,plumed_vir,force_vir);
-              }
-              if(bDoReplEx) plumed_cmd(plumedmain,"GREX savePositions",NULL);
-              if(plumedWantsToStop) ir->nsteps=step_rel+1;
-              if(bHREX) plumed_cmd(plumedmain,"GREX cacheLocalUNow",&enerd->term[F_EPOT]);
-            }
-            /* END PLUMED */
-        }
-
-        if (bVV && !bStartingFromCpt && !bRerunMD)
-        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
-        {
-            rvec *vbuf = NULL;
-
-            wallcycle_start(wcycle, ewcUPDATE);
-            if (ir->eI == eiVV && bInitStep)
-            {
-                /* if using velocity verlet with full time step Ekin,
-                 * take the first half step only to compute the
-                 * virial for the first step. From there,
-                 * revert back to the initial coordinates
-                 * so that the input is actually the initial step.
-                 */
-                snew(vbuf, state->natoms);
-                copy_rvecn(state->v, vbuf, 0, state->natoms); /* should make this better for parallelizing? */
-            }
-            else
-            {
-                /* this is for NHC in the Ekin(t+dt/2) version of vv */
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
-            }
-
-            /* If we are using twin-range interactions where the long-range component
-             * is only evaluated every nstcalclr>1 steps, we should do a special update
-             * step to combine the long-range forces on these steps.
-             * For nstcalclr=1 this is not done, since the forces would have been added
-             * directly to the short-range forces already.
-             *
-             * TODO Remove various aspects of VV+twin-range in master
-             * branch, because VV integrators did not ever support
-             * twin-range multiple time stepping with constraints.
-             */
-            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC,
-                          f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                          ekind, M, upd, bInitStep, etrtVELOCITY1,
-                          cr, nrnb, constr, &top->idef);
-
-            if (!bRerunMD || rerun_fr.bV || bForceUpdate)         /* Why is rerun_fr.bV here?  Unclear. */
-            {
-                wallcycle_stop(wcycle, ewcUPDATE);
-                update_constraints(fplog, step, NULL, ir, mdatoms,
-                                   state, fr->bMolPBC, graph, f,
-                                   &top->idef, shake_vir,
-                                   cr, nrnb, wcycle, upd, constr,
-                                   TRUE, bCalcVir);
-                wallcycle_start(wcycle, ewcUPDATE);
-                if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
-                {
-                    /* Correct the virial for multiple time stepping */
-                    m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
-                }
-            }
-            else if (graph)
-            {
-                /* Need to unshift here if a do_force has been
-                   called in the previous step */
-                unshift_self(graph, state->box, state->x);
-            }
-            /* if VV, compute the pressure and constraints */
-            /* For VV2, we strictly only need this if using pressure
-             * control, but we really would like to have accurate pressures
-             * printed out.
-             * Think about ways around this in the future?
-             * For now, keep this choice in comments.
-             */
-            /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
-            /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
-            bPres = TRUE;
-            bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
-            if (bCalcEner && ir->eI == eiVVAK)
-            {
-                bSumEkinhOld = TRUE;
-            }
-            /* for vv, the first half of the integration actually corresponds to the previous step.
-               So we need information from the last step in the first half of the integration */
-            if (bGStat || do_per_step(step-1, nstglobalcomm))
-            {
-                wallcycle_stop(wcycle, ewcUPDATE);
-                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                                constr, NULL, FALSE, state->box,
-                                top_global, &bSumEkinhOld,
-                                cglo_flags
-                                | CGLO_ENERGY
-                                | (bTemp ? CGLO_TEMPERATURE : 0)
-                                | (bPres ? CGLO_PRESSURE : 0)
-                                | (bPres ? CGLO_CONSTRAINT : 0)
-                                | (bStopCM ? CGLO_STOPCM : 0)
-                                | CGLO_SCALEEKIN
-                                );
-                /* explanation of above:
-                   a) We compute Ekin at the full time step
-                   if 1) we are using the AveVel Ekin, and it's not the
-                   initial step, or 2) if we are using AveEkin, but need the full
-                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
-                   b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
-                   EkinAveVel because it's needed for the pressure */
-                wallcycle_start(wcycle, ewcUPDATE);
-            }
-            /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
-            if (!bInitStep)
-            {
-                if (bTrotter)
-                {
-                    m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
-                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
-                }
-                else
-                {
-                    if (bExchanged)
-                    {
-                        wallcycle_stop(wcycle, ewcUPDATE);
-                        /* We need the kinetic energy at minus the half step for determining
-                         * the full step kinetic energy and possibly for T-coupling.*/
-                        /* This may not be quite working correctly yet . . . . */
-                        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                                        wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
-                                        constr, NULL, FALSE, state->box,
-                                        top_global, &bSumEkinhOld,
-                                        CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
-                        wallcycle_start(wcycle, ewcUPDATE);
-                    }
-                }
-            }
-            if (bTrotter && !bInitStep)
-            {
-                copy_mat(shake_vir, state->svir_prev);
-                copy_mat(force_vir, state->fvir_prev);
-                if (IR_NVT_TROTTER(ir) && ir->eI == eiVV)
-                {
-                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
-                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE);
-                    enerd->term[F_EKIN] = trace(ekind->ekin);
-                }
-            }
-            /* if it's the initial step, we performed this first step just to get the constraint virial */
-            if (ir->eI == eiVV && bInitStep)
-            {
-                copy_rvecn(vbuf, state->v, 0, state->natoms);
-                sfree(vbuf);
-            }
-            wallcycle_stop(wcycle, ewcUPDATE);
-        }
-
-        /* compute the conserved quantity */
-        if (bVV)
-        {
-            saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ);
-            if (ir->eI == eiVV)
-            {
-                last_ekin = enerd->term[F_EKIN];
-            }
-            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
-            {
-                saved_conserved_quantity -= enerd->term[F_DISPCORR];
-            }
-            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
-            if (ir->efep != efepNO && !bRerunMD)
-            {
-                sum_dhdl(enerd, state->lambda, ir->fepvals);
-            }
-        }
-
-        /* ########  END FIRST UPDATE STEP  ############## */
-        /* ########  If doing VV, we now have v(dt) ###### */
-        if (bDoExpanded)
-        {
-            /* perform extended ensemble sampling in lambda - we don't
-               actually move to the new state before outputting
-               statistics, but if performing simulated tempering, we
-               do update the velocities and the tau_t. */
-
-            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms);
-            /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
-            copy_df_history(&state_global->dfhist, &state->dfhist);
-        }
-
-        /* Now we have the energies and forces corresponding to the
-         * coordinates at time t. We must output all of this before
-         * the update.
-         */
-        do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
-                                 ir, state, state_global, top_global, fr,
-                                 outf, mdebin, ekind, f, f_global,
-                                 &nchkpt,
-                                 bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT),
-                                 bSumEkinhOld);
-        /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
-        bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle);
-
-        /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
-        if (bStartingFromCpt && bTrotter)
-        {
-            copy_mat(state->svir_prev, shake_vir);
-            copy_mat(state->fvir_prev, force_vir);
-        }
-
-        elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting);
-
-        /* Check whether everything is still allright */
-        if (((int)gmx_get_stop_condition() > handled_stop_condition)
-#ifdef GMX_THREAD_MPI
-            && MASTER(cr)
-#endif
-            )
-        {
-            /* this is just make gs.sig compatible with the hack
-               of sending signals around by MPI_Reduce with together with
-               other floats */
-            if (gmx_get_stop_condition() == gmx_stop_cond_next_ns)
-            {
-                gs.sig[eglsSTOPCOND] = 1;
-            }
-            if (gmx_get_stop_condition() == gmx_stop_cond_next)
-            {
-                gs.sig[eglsSTOPCOND] = -1;
-            }
-            /* < 0 means stop at next step, > 0 means stop at next NS step */
-            if (fplog)
-            {
-                fprintf(fplog,
-                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
-                        gmx_get_signal_name(),
-                        gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
-                fflush(fplog);
-            }
-            fprintf(stderr,
-                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
-                    gmx_get_signal_name(),
-                    gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
-            fflush(stderr);
-            handled_stop_condition = (int)gmx_get_stop_condition();
-        }
-        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
-                 (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) &&
-                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
-        {
-            /* Signal to terminate the run */
-            gs.sig[eglsSTOPCOND] = 1;
-            if (fplog)
-            {
-                fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
-            }
-            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
-        }
-
-        if (bResetCountersHalfMaxH && MASTER(cr) &&
-            elapsed_time > max_hours*60.0*60.0*0.495)
-        {
-            /* Set flag that will communicate the signal to all ranks in the simulation */
-            gs.sig[eglsRESETCOUNTERS] = 1;
-        }
-
-        /* In parallel we only have to check for checkpointing in steps
-         * where we do global communication,
-         *  otherwise the other nodes don't know.
-         */
-        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
-                           cpt_period >= 0 &&
-                           (cpt_period == 0 ||
-                            elapsed_time >= nchkpt*cpt_period*60.0)) &&
-            gs.set[eglsCHKPT] == 0)
-        {
-            gs.sig[eglsCHKPT] = 1;
-        }
-
-        /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
-           in preprocessing */
-
-        if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
-        {
-            gmx_bool bIfRandomize;
-            bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr);
-            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
-            if (constr && bIfRandomize)
-            {
-                update_constraints(fplog, step, NULL, ir, mdatoms,
-                                   state, fr->bMolPBC, graph, f,
-                                   &top->idef, tmp_vir,
-                                   cr, nrnb, wcycle, upd, constr,
-                                   TRUE, bCalcVir);
-            }
-        }
-        /* #########   START SECOND UPDATE STEP ################# */
-        /* Box is changed in update() when we do pressure coupling,
-         * but we should still use the old box for energy corrections and when
-         * writing it to the energy file, so it matches the trajectory files for
-         * the same timestep above. Make a copy in a separate array.
-         */
-        copy_mat(state->box, lastbox);
-
-        dvdl_constr = 0;
-
-        if (!bRerunMD || rerun_fr.bV || bForceUpdate)
-        {
-            wallcycle_start(wcycle, ewcUPDATE);
-            /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
-            if (bTrotter)
-            {
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
-                /* We can only do Berendsen coupling after we have summed
-                 * the kinetic energy or virial. Since the happens
-                 * in global_state after update, we should only do it at
-                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
-                 */
-            }
-            else
-            {
-                update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
-                update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep);
-            }
-
-            if (bVV)
-            {
-                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-                /* velocity half-step update */
-                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
-                              bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                              ekind, M, upd, FALSE, etrtVELOCITY2,
-                              cr, nrnb, constr, &top->idef);
-            }
-
-            /* Above, initialize just copies ekinh into ekin,
-             * it doesn't copy position (for VV),
-             * and entire integrator for MD.
-             */
-
-            if (ir->eI == eiVVAK)
-            {
-                /* We probably only need md->homenr, not state->natoms */
-                if (state->natoms > cbuf_nalloc)
-                {
-                    cbuf_nalloc = state->natoms;
-                    srenew(cbuf, cbuf_nalloc);
-                }
-                copy_rvecn(state->x, cbuf, 0, state->natoms);
-            }
-            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
-                          bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                          ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
-            wallcycle_stop(wcycle, ewcUPDATE);
-
-            update_constraints(fplog, step, &dvdl_constr, ir, mdatoms, state,
-                               fr->bMolPBC, graph, f,
-                               &top->idef, shake_vir,
-                               cr, nrnb, wcycle, upd, constr,
-                               FALSE, bCalcVir);
-
-            if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
-            {
-                /* Correct the virial for multiple time stepping */
-                m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
-            }
-
-            if (ir->eI == eiVVAK)
-            {
-                /* erase F_EKIN and F_TEMP here? */
-                /* just compute the kinetic energy at the half step to perform a trotter step */
-                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                                constr, NULL, FALSE, lastbox,
-                                top_global, &bSumEkinhOld,
-                                cglo_flags | CGLO_TEMPERATURE
-                                );
-                wallcycle_start(wcycle, ewcUPDATE);
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
-                /* now we know the scaling, we can compute the positions again again */
-                copy_rvecn(cbuf, state->x, 0, state->natoms);
-
-                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
-                              bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                              ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
-                wallcycle_stop(wcycle, ewcUPDATE);
-
-                /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
-                /* are the small terms in the shake_vir here due
-                 * to numerical errors, or are they important
-                 * physically? I'm thinking they are just errors, but not completely sure.
-                 * For now, will call without actually constraining, constr=NULL*/
-                update_constraints(fplog, step, NULL, ir, mdatoms,
-                                   state, fr->bMolPBC, graph, f,
-                                   &top->idef, tmp_vir,
-                                   cr, nrnb, wcycle, upd, NULL,
-                                   FALSE, bCalcVir);
-            }
-            if (bVV)
-            {
-                /* this factor or 2 correction is necessary
-                   because half of the constraint force is removed
-                   in the vv step, so we have to double it.  See
-                   the Redmine issue #1255.  It is not yet clear
-                   if the factor of 2 is exact, or just a very
-                   good approximation, and this will be
-                   investigated.  The next step is to see if this
-                   can be done adding a dhdl contribution from the
-                   rattle step, but this is somewhat more
-                   complicated with the current code. Will be
-                   investigated, hopefully for 4.6.3. However,
-                   this current solution is much better than
-                   having it completely wrong.
-                 */
-                enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
-            }
-            else
-            {
-                enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-            }
-        }
-        else if (graph)
-        {
-            /* Need to unshift here */
-            unshift_self(graph, state->box, state->x);
-        }
-
-        if (vsite != NULL)
-        {
-            wallcycle_start(wcycle, ewcVSITECONSTR);
-            if (graph != NULL)
-            {
-                shift_self(graph, state->box, state->x);
-            }
-            construct_vsites(vsite, state->x, ir->delta_t, state->v,
-                             top->idef.iparams, top->idef.il,
-                             fr->ePBC, fr->bMolPBC, cr, state->box);
-
-            if (graph != NULL)
-            {
-                unshift_self(graph, state->box, state->x);
-            }
-            wallcycle_stop(wcycle, ewcVSITECONSTR);
-        }
-
-        /* ############## IF NOT VV, Calculate globals HERE  ############ */
-        /* With Leap-Frog we can skip compute_globals at
-         * non-communication steps, but we need to calculate
-         * the kinetic energy one step before communication.
-         */
-        if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)))
-        {
-            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                            wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                            constr, &gs,
-                            (step_rel % gs.nstms == 0) &&
-                            (multisim_nsteps < 0 || (step_rel < multisim_nsteps)),
-                            lastbox,
-                            top_global, &bSumEkinhOld,
-                            cglo_flags
-                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
-                            | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
-                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
-                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
-                            | CGLO_CONSTRAINT
-                            );
-        }
-
-        /* #############  END CALC EKIN AND PRESSURE ################# */
-
-        /* Note: this is OK, but there are some numerical precision issues with using the convergence of
-           the virial that should probably be addressed eventually. state->veta has better properies,
-           but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
-           generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
-
-        if (ir->efep != efepNO && (!bVV || bRerunMD))
-        {
-            /* Sum up the foreign energy and dhdl terms for md and sd.
-               Currently done every step so that dhdl is correct in the .edr */
-            sum_dhdl(enerd, state->lambda, ir->fepvals);
-        }
-        update_box(fplog, step, ir, mdatoms, state, f,
-                   pcoupl_mu, nrnb, upd);
-
-        /* ################# END UPDATE STEP 2 ################# */
-        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
-
-        /* The coordinates (x) were unshifted in update */
-        if (!bGStat)
-        {
-            /* We will not sum ekinh_old,
-             * so signal that we still have to do it.
-             */
-            bSumEkinhOld = TRUE;
-        }
-
-        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
-
-        /* use the directly determined last velocity, not actually the averaged half steps */
-        if (bTrotter && ir->eI == eiVV)
-        {
-            enerd->term[F_EKIN] = last_ekin;
-        }
-        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
-
-        if (bVV)
-        {
-            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
-        }
-        else
-        {
-            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ);
-        }
-        /* #########  END PREPARING EDR OUTPUT  ###########  */
-
-        /* Output stuff */
-        if (MASTER(cr))
-        {
-            if (fplog && do_log && bDoExpanded)
-            {
-                /* only needed if doing expanded ensemble */
-                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL,
-                                          &state_global->dfhist, state->fep_state, ir->nstlog, step);
-            }
-            if (bCalcEner)
-            {
-                upd_mdebin(mdebin, bDoDHDL, bCalcEnerStep,
-                           t, mdatoms->tmass, enerd, state,
-                           ir->fepvals, ir->expandedvals, lastbox,
-                           shake_vir, force_vir, total_vir, pres,
-                           ekind, mu_tot, constr);
-            }
-            else
-            {
-                upd_mdebin_step(mdebin);
-            }
-
-            gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
-            gmx_bool do_or  = do_per_step(step, ir->nstorireout);
-
-            print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL,
-                       step, t,
-                       eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts));
-
-            if (ir->bPull)
-            {
-                pull_print_output(ir->pull_work, step, t);
-            }
-
-            if (do_per_step(step, ir->nstlog))
-            {
-                if (fflush(fplog) != 0)
-                {
-                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
-                }
-            }
-        }
-        if (bDoExpanded)
-        {
-            /* Have to do this part _after_ outputting the logfile and the edr file */
-            /* Gets written into the state at the beginning of next loop*/
-            state->fep_state = lamnew;
-        }
-        /* Print the remaining wall clock time for the run */
-        if (MULTIMASTER(cr) &&
-            (do_verbose || gmx_got_usr_signal()) &&
-            !bPMETunePrinting)
-        {
-            if (shellfc)
-            {
-                fprintf(stderr, "\n");
-            }
-            print_time(stderr, walltime_accounting, step, ir, cr);
-        }
-
-        /* Ion/water position swapping.
-         * Not done in last step since trajectory writing happens before this call
-         * in the MD loop and exchanges would be lost anyway. */
-        bNeedRepartition = FALSE;
-        if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
-            do_per_step(step, ir->swap->nstswap))
-        {
-            bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle,
-                                             bRerunMD ? rerun_fr.x   : state->x,
-                                             bRerunMD ? rerun_fr.box : state->box,
-                                             top_global, MASTER(cr) && bVerbose, bRerunMD);
-
-            if (bNeedRepartition && DOMAINDECOMP(cr))
-            {
-                dd_collect_state(cr->dd, state, state_global);
-            }
-        }
-
-        /* Replica exchange */
-        bExchanged = FALSE;
-        if (bDoReplEx)
-        {
-            bExchanged = replica_exchange(fplog, cr, repl_ex,
-                                          state_global, enerd,
-                                          state, step, t);
-        }
-
-        if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
-        {
-            dd_partition_system(fplog, step, cr, TRUE, 1,
-                                state_global, top_global, ir,
-                                state, &f, mdatoms, top, fr,
-                                vsite, shellfc, constr,
-                                nrnb, wcycle, FALSE);
-        }
-
-        bFirstStep       = FALSE;
-        bInitStep        = FALSE;
-        bStartingFromCpt = FALSE;
-
-        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
-        /* With all integrators, except VV, we need to retain the pressure
-         * at the current step for coupling at the next step.
-         */
-        if ((state->flags & (1<<estPRES_PREV)) &&
-            (bGStatEveryStep ||
-             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
-        {
-            /* Store the pressure in t_state for pressure coupling
-             * at the next MD step.
-             */
-            copy_mat(pres, state->pres_prev);
-        }
-
-        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
-
-        if ( (membed != NULL) && (!bLastStep) )
-        {
-            rescale_membed(step_rel, membed, state_global->x);
-        }
-
-        if (bRerunMD)
-        {
-            if (MASTER(cr))
-            {
-                /* read next frame from input trajectory */
-                bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
-            }
-
-            if (PAR(cr))
-            {
-                rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
-            }
-        }
-
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
-        if (DOMAINDECOMP(cr) && wcycle)
-        {
-            dd_cycles_add(cr->dd, cycles, ddCyclStep);
-        }
-
-        if (!bRerunMD || !rerun_fr.bStep)
-        {
-            /* increase the MD step number */
-            step++;
-            step_rel++;
-        }
-
-        /* TODO make a counter-reset module */
-        /* If it is time to reset counters, set a flag that remains
-           true until counters actually get reset */
-        if (step_rel == wcycle_get_reset_counters(wcycle) ||
-            gs.set[eglsRESETCOUNTERS] != 0)
-        {
-            if (pme_loadbal_is_active(pme_loadbal))
-            {
-                /* Do not permit counter reset while PME load
-                 * balancing is active. The only purpose for resetting
-                 * counters is to measure reliable performance data,
-                 * and that can't be done before balancing
-                 * completes.
-                 *
-                 * TODO consider fixing this by delaying the reset
-                 * until after load balancing completes,
-                 * e.g. https://gerrit.gromacs.org/#/c/4964/2 */
-                gmx_fatal(FARGS, "PME tuning was still active when attempting to "
-                          "reset mdrun counters at step %" GMX_PRId64 ". Try "
-                          "resetting counters later in the run, e.g. with gmx "
-                          "mdrun -resetstep.", step);
-            }
-            reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting,
-                               use_GPU(fr->nbv) ? fr->nbv : NULL);
-            wcycle_set_reset_counters(wcycle, -1);
-            if (!(cr->duty & DUTY_PME))
-            {
-                /* Tell our PME node to reset its counters */
-                gmx_pme_send_resetcounters(cr, step);
-            }
-            /* Correct max_hours for the elapsed time */
-            max_hours                -= elapsed_time/(60.0*60.0);
-            /* If mdrun -maxh -resethway was active, it can only trigger once */
-            bResetCountersHalfMaxH    = FALSE; /* TODO move this to where gs.sig[eglsRESETCOUNTERS] is set */
-            /* Reset can only happen once, so clear the triggering flag. */
-            gs.set[eglsRESETCOUNTERS] = 0;
-        }
-
-        /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
-        IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle);
-
-    }
-    /* End of main MD loop */
-    debug_gmx();
-
-    /* Closing TNG files can include compressing data. Therefore it is good to do that
-     * before stopping the time measurements. */
-    mdoutf_tng_close(outf);
-
-    /* Stop measuring walltime */
-    walltime_accounting_end(walltime_accounting);
-
-    if (bRerunMD && MASTER(cr))
-    {
-        close_trj(status);
-    }
-
-    if (!(cr->duty & DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    if (MASTER(cr))
-    {
-        if (ir->nstcalcenergy > 0 && !bRerunMD)
-        {
-            print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t,
-                       eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts));
-        }
-    }
-
-    done_mdoutf(outf);
-    debug_gmx();
-
-    if (bPMETune)
-    {
-        pme_loadbal_done(pme_loadbal, cr, fplog, use_GPU(fr->nbv));
-    }
-
-    if (shellfc && fplog)
-    {
-        fprintf(fplog, "Fraction of iterations that converged:           %.2f %%\n",
-                (nconverged*100.0)/step_rel);
-        fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n",
-                tcount/step_rel);
-    }
-
-    if (repl_ex_nst > 0 && MASTER(cr))
-    {
-        print_replica_exchange_statistics(fplog, repl_ex);
-    }
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(ir->bIMD, ir->imd);
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
-
-    return 0;
-}
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/md.cpp.preplumed b/patches/gromacs-5.1.4.diff/src/programs/mdrun/md.cpp.preplumed
deleted file mode 100644
index afa3d62a75a4e225841c05283444184851e8fa20..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/md.cpp.preplumed
+++ /dev/null
@@ -1,1846 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "thread_mpi/threads.h"
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/domdec/domdec_network.h"
-#include "gromacs/ewald/pme-load-balancing.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/filenm.h"
-#include "gromacs/fileio/mdoutf.h"
-#include "gromacs/fileio/trajectory_writing.h"
-#include "gromacs/fileio/trx.h"
-#include "gromacs/fileio/trxio.h"
-#include "gromacs/imd/imd.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/ebin.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdebin.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/nrnb.h"
-#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/legacyheaders/shellfc.h"
-#include "gromacs/legacyheaders/sighandler.h"
-#include "gromacs/legacyheaders/sim_util.h"
-#include "gromacs/legacyheaders/tgroup.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/update.h"
-#include "gromacs/legacyheaders/vcm.h"
-#include "gromacs/legacyheaders/vsite.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/legacyheaders/types/constr.h"
-#include "gromacs/legacyheaders/types/enums.h"
-#include "gromacs/legacyheaders/types/fcdata.h"
-#include "gromacs/legacyheaders/types/force_flags.h"
-#include "gromacs/legacyheaders/types/forcerec.h"
-#include "gromacs/legacyheaders/types/group.h"
-#include "gromacs/legacyheaders/types/inputrec.h"
-#include "gromacs/legacyheaders/types/interaction_const.h"
-#include "gromacs/legacyheaders/types/mdatom.h"
-#include "gromacs/legacyheaders/types/membedt.h"
-#include "gromacs/legacyheaders/types/nrnb.h"
-#include "gromacs/legacyheaders/types/oenv.h"
-#include "gromacs/legacyheaders/types/shellfc.h"
-#include "gromacs/legacyheaders/types/state.h"
-#include "gromacs/listed-forces/manage-threading.h"
-#include "gromacs/math/utilities.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/math/vectypes.h"
-#include "gromacs/mdlib/compute_io.h"
-#include "gromacs/mdlib/mdrun_signalling.h"
-#include "gromacs/mdlib/nb_verlet.h"
-#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
-#include "gromacs/pbcutil/mshift.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/timing/walltime_accounting.h"
-#include "gromacs/topology/atoms.h"
-#include "gromacs/topology/idef.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/topology/topology.h"
-#include "gromacs/utility/basedefinitions.h"
-#include "gromacs/utility/cstringutil.h"
-#include "gromacs/utility/fatalerror.h"
-#include "gromacs/utility/real.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "deform.h"
-#include "membed.h"
-#include "repl_ex.h"
-
-#ifdef GMX_FAHCORE
-#include "corewrap.h"
-#endif
-
-static void reset_all_counters(FILE *fplog, t_commrec *cr,
-                               gmx_int64_t step,
-                               gmx_int64_t *step_rel, t_inputrec *ir,
-                               gmx_wallcycle_t wcycle, t_nrnb *nrnb,
-                               gmx_walltime_accounting_t walltime_accounting,
-                               struct nonbonded_verlet_t *nbv)
-{
-    char sbuf[STEPSTRSIZE];
-
-    /* Reset all the counters related to performance over the run */
-    md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
-                  gmx_step_str(step, sbuf));
-
-    if (use_GPU(nbv))
-    {
-        nbnxn_gpu_reset_timings(nbv);
-    }
-
-    wallcycle_stop(wcycle, ewcRUN);
-    wallcycle_reset_all(wcycle);
-    if (DOMAINDECOMP(cr))
-    {
-        reset_dd_statistics_counters(cr->dd);
-    }
-    init_nrnb(nrnb);
-    ir->init_step += *step_rel;
-    ir->nsteps    -= *step_rel;
-    *step_rel      = 0;
-    wallcycle_start(wcycle, ewcRUN);
-    walltime_accounting_start(walltime_accounting);
-    print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime());
-}
-
-double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
-             const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
-             int nstglobalcomm,
-             gmx_vsite_t *vsite, gmx_constr_t constr,
-             int stepout, t_inputrec *ir,
-             gmx_mtop_t *top_global,
-             t_fcdata *fcd,
-             t_state *state_global,
-             t_mdatoms *mdatoms,
-             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
-             gmx_edsam_t ed, t_forcerec *fr,
-             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed,
-             real cpt_period, real max_hours,
-             int imdport,
-             unsigned long Flags,
-             gmx_walltime_accounting_t walltime_accounting)
-{
-    gmx_mdoutf_t    outf = NULL;
-    gmx_int64_t     step, step_rel;
-    double          elapsed_time;
-    double          t, t0, lam0[efptNR];
-    gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
-    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
-                    bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep,
-                    bBornRadii, bStartingFromCpt;
-    gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
-    gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
-                      bForceUpdate = FALSE, bCPT;
-    gmx_bool          bMasterState;
-    int               force_flags, cglo_flags;
-    tensor            force_vir, shake_vir, total_vir, tmp_vir, pres;
-    int               i, m;
-    t_trxstatus      *status;
-    rvec              mu_tot;
-    t_vcm            *vcm;
-    matrix            pcoupl_mu, M;
-    t_trxframe        rerun_fr;
-    gmx_repl_ex_t     repl_ex = NULL;
-    int               nchkpt  = 1;
-    gmx_localtop_t   *top;
-    t_mdebin         *mdebin   = NULL;
-    t_state          *state    = NULL;
-    rvec             *f_global = NULL;
-    gmx_enerdata_t   *enerd;
-    rvec             *f = NULL;
-    gmx_global_stat_t gstat;
-    gmx_update_t      upd   = NULL;
-    t_graph          *graph = NULL;
-    gmx_signalling_t  gs;
-    gmx_groups_t     *groups;
-    gmx_ekindata_t   *ekind;
-    gmx_shellfc_t     shellfc;
-    int               count, nconverged = 0;
-    double            tcount                 = 0;
-    gmx_bool          bConverged             = TRUE, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
-    gmx_bool          bResetCountersHalfMaxH = FALSE;
-    gmx_bool          bVV, bTemp, bPres, bTrotter;
-    gmx_bool          bUpdateDoLR;
-    real              dvdl_constr;
-    rvec             *cbuf        = NULL;
-    int               cbuf_nalloc = 0;
-    matrix            lastbox;
-    int               lamnew  = 0;
-    /* for FEP */
-    int               nstfep = 0;
-    double            cycles;
-    real              saved_conserved_quantity = 0;
-    real              last_ekin                = 0;
-    t_extmass         MassQ;
-    int             **trotter_seq;
-    char              sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
-    int               handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/
-    gmx_int64_t       multisim_nsteps        = -1;                 /* number of steps to do  before first multisim
-                                                                          simulation stops. If equal to zero, don't
-                                                                          communicate any more between multisims.*/
-    /* PME load balancing data for GPU kernels */
-    pme_load_balancing_t *pme_loadbal      = NULL;
-    gmx_bool              bPMETune         = FALSE;
-    gmx_bool              bPMETunePrinting = FALSE;
-
-    /* Interactive MD */
-    gmx_bool          bIMDstep = FALSE;
-
-#ifdef GMX_FAHCORE
-    /* Temporary addition for FAHCORE checkpointing */
-    int chkpt_ret;
-#endif
-
-    /* Check for special mdrun options */
-    bRerunMD = (Flags & MD_RERUN);
-    if (Flags & MD_RESETCOUNTERSHALFWAY)
-    {
-        if (ir->nsteps > 0)
-        {
-            /* Signal to reset the counters half the simulation steps. */
-            wcycle_set_reset_counters(wcycle, ir->nsteps/2);
-        }
-        /* Signal to reset the counters halfway the simulation time. */
-        bResetCountersHalfMaxH = (max_hours > 0);
-    }
-
-    /* md-vv uses averaged full step velocities for T-control
-       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
-       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
-    bVV      = EI_VV(ir->eI);
-    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
-
-    if (bRerunMD)
-    {
-        /* Since we don't know if the frames read are related in any way,
-         * rebuild the neighborlist at every step.
-         */
-        ir->nstlist       = 1;
-        ir->nstcalcenergy = 1;
-        nstglobalcomm     = 1;
-    }
-
-    check_ir_old_tpx_versions(cr, fplog, ir, top_global);
-
-    nstglobalcomm   = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir);
-    bGStatEveryStep = (nstglobalcomm == 1);
-
-    if (bRerunMD)
-    {
-        ir->nstxout_compressed = 0;
-    }
-    groups = &top_global->groups;
-
-    /* Initial values */
-    init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda,
-            &(state_global->fep_state), lam0,
-            nrnb, top_global, &upd,
-            nfile, fnm, &outf, &mdebin,
-            force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle);
-
-    clear_mat(total_vir);
-    clear_mat(pres);
-    /* Energy terms and groups */
-    snew(enerd, 1);
-    init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
-                  enerd);
-    if (DOMAINDECOMP(cr))
-    {
-        f = NULL;
-    }
-    else
-    {
-        snew(f, top_global->natoms);
-    }
-
-    /* Kinetic energy data */
-    snew(ekind, 1);
-    init_ekindata(fplog, top_global, &(ir->opts), ekind);
-    /* Copy the cos acceleration to the groups struct */
-    ekind->cosacc.cos_accel = ir->cos_accel;
-
-    gstat = global_stat_init(ir);
-    debug_gmx();
-
-    /* Check for polarizable models and flexible constraints */
-    shellfc = init_shell_flexcon(fplog,
-                                 top_global, n_flexible_constraints(constr),
-                                 (ir->bContinuation ||
-                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
-                                 NULL : state_global->x);
-    if (shellfc && ir->nstcalcenergy != 1)
-    {
-        gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy);
-    }
-    if (shellfc && DOMAINDECOMP(cr))
-    {
-        gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank");
-    }
-    if (shellfc && ir->eI == eiNM)
-    {
-        /* Currently shells don't work with Normal Modes */
-        gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
-    }
-
-    if (vsite && ir->eI == eiNM)
-    {
-        /* Currently virtual sites don't work with Normal Modes */
-        gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
-    }
-
-    if (DEFORM(*ir))
-    {
-        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
-        set_deform_reference_box(upd,
-                                 deform_init_init_step_tpx,
-                                 deform_init_box_tpx);
-        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
-    }
-
-    {
-        double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1);
-        if ((io > 2000) && MASTER(cr))
-        {
-            fprintf(stderr,
-                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
-                    io);
-        }
-    }
-
-    if (DOMAINDECOMP(cr))
-    {
-        top = dd_init_local_top(top_global);
-
-        snew(state, 1);
-        dd_init_local_state(cr->dd, state_global, state);
-
-        if (DDMASTER(cr->dd) && ir->nstfout)
-        {
-            snew(f_global, state_global->natoms);
-        }
-    }
-    else
-    {
-        top = gmx_mtop_generate_local_top(top_global, ir);
-
-        state    = serial_init_local_state(state_global);
-        f_global = f;
-
-        atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);
-
-        if (vsite)
-        {
-            set_vsite_top(vsite, top, mdatoms, cr);
-        }
-
-        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
-        {
-            graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE);
-        }
-
-        if (shellfc)
-        {
-            make_local_shells(cr, mdatoms, shellfc);
-        }
-
-        setup_bonded_threading(fr, &top->idef);
-    }
-
-    /* Set up interactive MD (IMD) */
-    init_IMD(ir, cr, top_global, fplog, ir->nstcalcenergy, state_global->x,
-             nfile, fnm, oenv, imdport, Flags);
-
-    if (DOMAINDECOMP(cr))
-    {
-        /* Distribute the charge groups over the nodes from the master node */
-        dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
-                            state_global, top_global, ir,
-                            state, &f, mdatoms, top, fr,
-                            vsite, shellfc, constr,
-                            nrnb, NULL, FALSE);
-
-    }
-
-    update_mdatoms(mdatoms, state->lambda[efptMASS]);
-
-    if (opt2bSet("-cpi", nfile, fnm))
-    {
-        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr);
-    }
-    else
-    {
-        bStateFromCP = FALSE;
-    }
-
-    if (ir->bExpanded)
-    {
-        init_expanded_ensemble(bStateFromCP, ir, &state->dfhist);
-    }
-
-    if (MASTER(cr))
-    {
-        if (bStateFromCP)
-        {
-            /* Update mdebin with energy history if appending to output files */
-            if (Flags & MD_APPENDFILES)
-            {
-                restore_energyhistory_from_state(mdebin, &state_global->enerhist);
-            }
-            else
-            {
-                /* We might have read an energy history from checkpoint,
-                 * free the allocated memory and reset the counts.
-                 */
-                done_energyhistory(&state_global->enerhist);
-                init_energyhistory(&state_global->enerhist);
-            }
-        }
-        /* Set the initial energy history in state by updating once */
-        update_energyhistory(&state_global->enerhist, mdebin);
-    }
-
-    /* Initialize constraints */
-    if (constr && !DOMAINDECOMP(cr))
-    {
-        set_constraints(constr, top, ir, mdatoms, cr);
-    }
-
-    if (repl_ex_nst > 0 && MASTER(cr))
-    {
-        repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir,
-                                        repl_ex_nst, repl_ex_nex, repl_ex_seed);
-    }
-
-    /* PME tuning is only supported with PME for Coulomb. Is is not supported
-     * with only LJ PME, or for reruns.
-     */
-    bPMETune = ((Flags & MD_TUNEPME) && EEL_PME(fr->eeltype) && !bRerunMD &&
-                !(Flags & MD_REPRODUCIBLE));
-    if (bPMETune)
-    {
-        pme_loadbal_init(&pme_loadbal, cr, fplog, ir, state->box,
-                         fr->ic, fr->pmedata, use_GPU(fr->nbv),
-                         &bPMETunePrinting);
-    }
-
-    if (!ir->bContinuation && !bRerunMD)
-    {
-        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
-        {
-            /* Set the velocities of frozen particles to zero */
-            for (i = 0; i < mdatoms->homenr; i++)
-            {
-                for (m = 0; m < DIM; m++)
-                {
-                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
-                    {
-                        state->v[i][m] = 0;
-                    }
-                }
-            }
-        }
-
-        if (constr)
-        {
-            /* Constrain the initial coordinates and velocities */
-            do_constrain_first(fplog, constr, ir, mdatoms, state,
-                               cr, nrnb, fr, top);
-        }
-        if (vsite)
-        {
-            /* Construct the virtual sites for the initial configuration */
-            construct_vsites(vsite, state->x, ir->delta_t, NULL,
-                             top->idef.iparams, top->idef.il,
-                             fr->ePBC, fr->bMolPBC, cr, state->box);
-        }
-    }
-
-    debug_gmx();
-
-    if (IR_TWINRANGE(*ir) && repl_ex_nst % ir->nstcalclr != 0)
-    {
-        /* We should exchange at nstcalclr steps to get correct integration */
-        gmx_fatal(FARGS, "The replica exchange period (%d) is not divisible by nstcalclr (%d)", repl_ex_nst, ir->nstcalclr);
-    }
-
-    if (ir->efep != efepNO)
-    {
-        /* Set free energy calculation frequency as the greatest common
-         * denominator of nstdhdl and repl_ex_nst.
-         * Check for nstcalclr with twin-range, since we need the long-range
-         * contribution to the free-energy at the correct (nstcalclr) steps.
-         */
-        nstfep = ir->fepvals->nstdhdl;
-        if (ir->bExpanded)
-        {
-            if (IR_TWINRANGE(*ir) &&
-                ir->expandedvals->nstexpanded % ir->nstcalclr != 0)
-            {
-                gmx_fatal(FARGS, "nstexpanded should be divisible by nstcalclr");
-            }
-            nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
-        }
-        if (repl_ex_nst > 0)
-        {
-            nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep);
-        }
-        /* We checked divisibility of repl_ex_nst and nstcalclr above */
-        if (IR_TWINRANGE(*ir) && nstfep % ir->nstcalclr != 0)
-        {
-            gmx_incons("nstfep not divisible by nstcalclr");
-        }
-    }
-
-    /* Be REALLY careful about what flags you set here. You CANNOT assume
-     * this is the first step, since we might be restarting from a checkpoint,
-     * and in that case we should not do any modifications to the state.
-     */
-    bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
-
-    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
-                  | (bStopCM ? CGLO_STOPCM : 0)
-                  | (bVV ? CGLO_PRESSURE : 0)
-                  | (bVV ? CGLO_CONSTRAINT : 0)
-                  | (bRerunMD ? CGLO_RERUNMD : 0)
-                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0));
-
-    bSumEkinhOld = FALSE;
-    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                    NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                    constr, NULL, FALSE, state->box,
-                    top_global, &bSumEkinhOld, cglo_flags);
-    if (ir->eI == eiVVAK)
-    {
-        /* a second call to get the half step temperature initialized as well */
-        /* we do the same call as above, but turn the pressure off -- internally to
-           compute_globals, this is recognized as a velocity verlet half-step
-           kinetic energy calculation.  This minimized excess variables, but
-           perhaps loses some logic?*/
-
-        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                        NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                        constr, NULL, FALSE, state->box,
-                        top_global, &bSumEkinhOld,
-                        cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE));
-    }
-
-    /* Calculate the initial half step temperature, and save the ekinh_old */
-    if (!(Flags & MD_STARTFROMCPT))
-    {
-        for (i = 0; (i < ir->opts.ngtc); i++)
-        {
-            copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
-        }
-    }
-    if (ir->eI != eiVV)
-    {
-        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
-                                     and there is no previous step */
-    }
-
-    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
-       temperature control */
-    trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
-
-    if (MASTER(cr))
-    {
-        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
-        {
-            fprintf(fplog,
-                    "RMS relative constraint deviation after constraining: %.2e\n",
-                    constr_rmsd(constr, FALSE));
-        }
-        if (EI_STATE_VELOCITY(ir->eI))
-        {
-            fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]);
-        }
-        if (bRerunMD)
-        {
-            fprintf(stderr, "starting md rerun '%s', reading coordinates from"
-                    " input trajectory '%s'\n\n",
-                    *(top_global->name), opt2fn("-rerun", nfile, fnm));
-            if (bVerbose)
-            {
-                fprintf(stderr, "Calculated time to finish depends on nsteps from "
-                        "run input file,\nwhich may not correspond to the time "
-                        "needed to process input trajectory.\n\n");
-            }
-        }
-        else
-        {
-            char tbuf[20];
-            fprintf(stderr, "starting mdrun '%s'\n",
-                    *(top_global->name));
-            if (ir->nsteps >= 0)
-            {
-                sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t);
-            }
-            else
-            {
-                sprintf(tbuf, "%s", "infinite");
-            }
-            if (ir->init_step > 0)
-            {
-                fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
-                        gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf,
-                        gmx_step_str(ir->init_step, sbuf2),
-                        ir->init_step*ir->delta_t);
-            }
-            else
-            {
-                fprintf(stderr, "%s steps, %s ps.\n",
-                        gmx_step_str(ir->nsteps, sbuf), tbuf);
-            }
-        }
-        fprintf(fplog, "\n");
-    }
-
-    walltime_accounting_start(walltime_accounting);
-    wallcycle_start(wcycle, ewcRUN);
-    print_start(fplog, cr, walltime_accounting, "mdrun");
-
-    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
-#ifdef GMX_FAHCORE
-    chkpt_ret = fcCheckPointParallel( cr->nodeid,
-                                      NULL, 0);
-    if (chkpt_ret == 0)
-    {
-        gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 );
-    }
-#endif
-
-    debug_gmx();
-    /***********************************************************
-     *
-     *             Loop over MD steps
-     *
-     ************************************************************/
-
-    /* if rerunMD then read coordinates and velocities from input trajectory */
-    if (bRerunMD)
-    {
-        if (getenv("GMX_FORCE_UPDATE"))
-        {
-            bForceUpdate = TRUE;
-        }
-
-        rerun_fr.natoms = 0;
-        if (MASTER(cr))
-        {
-            bNotLastFrame = read_first_frame(oenv, &status,
-                                             opt2fn("-rerun", nfile, fnm),
-                                             &rerun_fr, TRX_NEED_X | TRX_READ_V);
-            if (rerun_fr.natoms != top_global->natoms)
-            {
-                gmx_fatal(FARGS,
-                          "Number of atoms in trajectory (%d) does not match the "
-                          "run input file (%d)\n",
-                          rerun_fr.natoms, top_global->natoms);
-            }
-            if (ir->ePBC != epbcNONE)
-            {
-                if (!rerun_fr.bBox)
-                {
-                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time);
-                }
-                if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong))
-                {
-                    gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time);
-                }
-            }
-        }
-
-        if (PAR(cr))
-        {
-            rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
-        }
-
-        if (ir->ePBC != epbcNONE)
-        {
-            /* Set the shift vectors.
-             * Necessary here when have a static box different from the tpr box.
-             */
-            calc_shifts(rerun_fr.box, fr->shift_vec);
-        }
-    }
-
-    /* loop over MD steps or if rerunMD to end of input trajectory */
-    bFirstStep = TRUE;
-    /* Skip the first Nose-Hoover integration when we get the state from tpx */
-    bStateFromTPX    = !bStateFromCP;
-    bInitStep        = bFirstStep && (bStateFromTPX || bVV);
-    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
-    bSumEkinhOld     = FALSE;
-    bExchanged       = FALSE;
-    bNeedRepartition = FALSE;
-
-    init_global_signals(&gs, cr, ir, repl_ex_nst);
-
-    step     = ir->init_step;
-    step_rel = 0;
-
-    if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
-    {
-        /* check how many steps are left in other sims */
-        multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
-    }
-    if (MULTISIM(cr) && max_hours > 0)
-    {
-        gmx_fatal(FARGS, "The combination of mdrun -maxh and mdrun -multi is not supported. Please use the nsteps .mdp field.");
-    }
-
-    /* and stop now if we should */
-    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
-                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
-    while (!bLastStep || (bRerunMD && bNotLastFrame))
-    {
-
-        /* Determine if this is a neighbor search step */
-        bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
-
-        if (bPMETune && bNStList)
-        {
-            /* PME grid + cut-off optimization with GPUs or PME nodes */
-            pme_loadbal_do(pme_loadbal, cr,
-                           (bVerbose && MASTER(cr)) ? stderr : NULL,
-                           fplog,
-                           ir, fr, state, wcycle,
-                           step, step_rel,
-                           &bPMETunePrinting);
-        }
-
-        wallcycle_start(wcycle, ewcSTEP);
-
-        if (bRerunMD)
-        {
-            if (rerun_fr.bStep)
-            {
-                step     = rerun_fr.step;
-                step_rel = step - ir->init_step;
-            }
-            if (rerun_fr.bTime)
-            {
-                t = rerun_fr.time;
-            }
-            else
-            {
-                t = step;
-            }
-        }
-        else
-        {
-            bLastStep = (step_rel == ir->nsteps);
-            t         = t0 + step*ir->delta_t;
-        }
-
-        if (ir->efep != efepNO || ir->bSimTemp)
-        {
-            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
-               requiring different logic. */
-
-            set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0);
-            bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
-            bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
-            bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
-                            && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt));
-        }
-
-        bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
-                     do_per_step(step, repl_ex_nst));
-
-        if (bSimAnn)
-        {
-            update_annealing_target_temp(&(ir->opts), t);
-        }
-
-        if (bRerunMD)
-        {
-            if (!DOMAINDECOMP(cr) || MASTER(cr))
-            {
-                for (i = 0; i < state_global->natoms; i++)
-                {
-                    copy_rvec(rerun_fr.x[i], state_global->x[i]);
-                }
-                if (rerun_fr.bV)
-                {
-                    for (i = 0; i < state_global->natoms; i++)
-                    {
-                        copy_rvec(rerun_fr.v[i], state_global->v[i]);
-                    }
-                }
-                else
-                {
-                    for (i = 0; i < state_global->natoms; i++)
-                    {
-                        clear_rvec(state_global->v[i]);
-                    }
-                    if (bRerunWarnNoV)
-                    {
-                        fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n"
-                                "         Ekin, temperature and pressure are incorrect,\n"
-                                "         the virial will be incorrect when constraints are present.\n"
-                                "\n");
-                        bRerunWarnNoV = FALSE;
-                    }
-                }
-            }
-            copy_mat(rerun_fr.box, state_global->box);
-            copy_mat(state_global->box, state->box);
-
-            if (vsite && (Flags & MD_RERUN_VSITE))
-            {
-                if (DOMAINDECOMP(cr))
-                {
-                    gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank");
-                }
-                if (graph)
-                {
-                    /* Following is necessary because the graph may get out of sync
-                     * with the coordinates if we only have every N'th coordinate set
-                     */
-                    mk_mshift(fplog, graph, fr->ePBC, state->box, state->x);
-                    shift_self(graph, state->box, state->x);
-                }
-                construct_vsites(vsite, state->x, ir->delta_t, state->v,
-                                 top->idef.iparams, top->idef.il,
-                                 fr->ePBC, fr->bMolPBC, cr, state->box);
-                if (graph)
-                {
-                    unshift_self(graph, state->box, state->x);
-                }
-            }
-        }
-
-        /* Stop Center of Mass motion */
-        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
-
-        if (bRerunMD)
-        {
-            /* for rerun MD always do Neighbour Searching */
-            bNS      = (bFirstStep || ir->nstlist != 0);
-            bNStList = bNS;
-        }
-        else
-        {
-            /* Determine whether or not to do Neighbour Searching and LR */
-            bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
-        }
-
-        /* check whether we should stop because another simulation has
-           stopped. */
-        if (MULTISIM(cr))
-        {
-            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&
-                 (multisim_nsteps != ir->nsteps) )
-            {
-                if (bNS)
-                {
-                    if (MASTER(cr))
-                    {
-                        fprintf(stderr,
-                                "Stopping simulation %d because another one has finished\n",
-                                cr->ms->sim);
-                    }
-                    bLastStep         = TRUE;
-                    gs.sig[eglsCHKPT] = 1;
-                }
-            }
-        }
-
-        /* < 0 means stop at next step, > 0 means stop at next NS step */
-        if ( (gs.set[eglsSTOPCOND] < 0) ||
-             ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) )
-        {
-            bLastStep = TRUE;
-        }
-
-        /* Determine whether or not to update the Born radii if doing GB */
-        bBornRadii = bFirstStep;
-        if (ir->implicit_solvent && (step % ir->nstgbradii == 0))
-        {
-            bBornRadii = TRUE;
-        }
-
-        /* do_log triggers energy and virial calculation. Because this leads
-         * to different code paths, forces can be different. Thus for exact
-         * continuation we should avoid extra log output.
-         * Note that the || bLastStep can result in non-exact continuation
-         * beyond the last step. But we don't consider that to be an issue.
-         */
-        do_log     = do_per_step(step, ir->nstlog) || (bFirstStep && !bStateFromCP) || bLastStep;
-        do_verbose = bVerbose &&
-            (step % stepout == 0 || bFirstStep || bLastStep);
-
-        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
-        {
-            if (bRerunMD)
-            {
-                bMasterState = TRUE;
-            }
-            else
-            {
-                bMasterState = FALSE;
-                /* Correct the new box if it is too skewed */
-                if (DYNAMIC_BOX(*ir))
-                {
-                    if (correct_box(fplog, step, state->box, graph))
-                    {
-                        bMasterState = TRUE;
-                    }
-                }
-                if (DOMAINDECOMP(cr) && bMasterState)
-                {
-                    dd_collect_state(cr->dd, state, state_global);
-                }
-            }
-
-            if (DOMAINDECOMP(cr))
-            {
-                /* Repartition the domain decomposition */
-                dd_partition_system(fplog, step, cr,
-                                    bMasterState, nstglobalcomm,
-                                    state_global, top_global, ir,
-                                    state, &f, mdatoms, top, fr,
-                                    vsite, shellfc, constr,
-                                    nrnb, wcycle,
-                                    do_verbose && !bPMETunePrinting);
-            }
-        }
-
-        if (MASTER(cr) && do_log)
-        {
-            print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */
-        }
-
-        if (ir->efep != efepNO)
-        {
-            update_mdatoms(mdatoms, state->lambda[efptMASS]);
-        }
-
-        if ((bRerunMD && rerun_fr.bV) || bExchanged)
-        {
-
-            /* We need the kinetic energy at minus the half step for determining
-             * the full step kinetic energy and possibly for T-coupling.*/
-            /* This may not be quite working correctly yet . . . . */
-            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                            wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
-                            constr, NULL, FALSE, state->box,
-                            top_global, &bSumEkinhOld,
-                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
-        }
-        clear_mat(force_vir);
-
-        /* We write a checkpoint at this MD step when:
-         * either at an NS step when we signalled through gs,
-         * or at the last step (but not when we do not want confout),
-         * but never at the first step or with rerun.
-         */
-        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
-                 (bLastStep && (Flags & MD_CONFOUT))) &&
-                step > ir->init_step && !bRerunMD);
-        if (bCPT)
-        {
-            gs.set[eglsCHKPT] = 0;
-        }
-
-        /* Determine the energy and pressure:
-         * at nstcalcenergy steps and at energy output steps (set below).
-         */
-        if (EI_VV(ir->eI) && (!bInitStep))
-        {
-            /* for vv, the first half of the integration actually corresponds
-               to the previous step.  bCalcEner is only required to be evaluated on the 'next' step,
-               but the virial needs to be calculated on both the current step and the 'next' step. Future
-               reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */
-
-            /* TODO: This is probably not what we want, we will write to energy file one step after nstcalcenergy steps. */
-            bCalcEnerStep = do_per_step(step - 1, ir->nstcalcenergy);
-            bCalcVir      = bCalcEnerStep ||
-                (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple)));
-        }
-        else
-        {
-            bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
-            bCalcVir      = bCalcEnerStep ||
-                (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
-        }
-        bCalcEner = bCalcEnerStep;
-
-        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
-
-        if (do_ene || do_log || bDoReplEx)
-        {
-            bCalcVir  = TRUE;
-            bCalcEner = TRUE;
-        }
-
-        /* Do we need global communication ? */
-        bGStat = (bCalcVir || bCalcEner || bStopCM ||
-                  do_per_step(step, nstglobalcomm) ||
-                  (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)));
-
-        /* these CGLO_ options remain the same throughout the iteration */
-        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
-                      (bGStat ? CGLO_GSTAT : 0)
-                      );
-
-        force_flags = (GMX_FORCE_STATECHANGED |
-                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
-                       GMX_FORCE_ALLFORCES |
-                       GMX_FORCE_SEPLRF |
-                       (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
-                       (bCalcEner ? GMX_FORCE_ENERGY : 0) |
-                       (bDoFEP ? GMX_FORCE_DHDL : 0)
-                       );
-
-        if (fr->bTwinRange)
-        {
-            if (do_per_step(step, ir->nstcalclr))
-            {
-                force_flags |= GMX_FORCE_DO_LR;
-            }
-        }
-
-        if (shellfc)
-        {
-            /* Now is the time to relax the shells */
-            count = relax_shell_flexcon(fplog, cr, bVerbose, step,
-                                        ir, bNS, force_flags,
-                                        top,
-                                        constr, enerd, fcd,
-                                        state, f, force_vir, mdatoms,
-                                        nrnb, wcycle, graph, groups,
-                                        shellfc, fr, bBornRadii, t, mu_tot,
-                                        &bConverged, vsite,
-                                        mdoutf_get_fp_field(outf));
-            tcount += count;
-
-            if (bConverged)
-            {
-                nconverged++;
-            }
-        }
-        else
-        {
-            /* The coordinates (x) are shifted (to get whole molecules)
-             * in do_force.
-             * This is parallellized as well, and does communication too.
-             * Check comments in sim_util.c
-             */
-            do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups,
-                     state->box, state->x, &state->hist,
-                     f, force_vir, mdatoms, enerd, fcd,
-                     state->lambda, graph,
-                     fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii,
-                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
-        }
-
-        if (bVV && !bStartingFromCpt && !bRerunMD)
-        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
-        {
-            rvec *vbuf = NULL;
-
-            wallcycle_start(wcycle, ewcUPDATE);
-            if (ir->eI == eiVV && bInitStep)
-            {
-                /* if using velocity verlet with full time step Ekin,
-                 * take the first half step only to compute the
-                 * virial for the first step. From there,
-                 * revert back to the initial coordinates
-                 * so that the input is actually the initial step.
-                 */
-                snew(vbuf, state->natoms);
-                copy_rvecn(state->v, vbuf, 0, state->natoms); /* should make this better for parallelizing? */
-            }
-            else
-            {
-                /* this is for NHC in the Ekin(t+dt/2) version of vv */
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
-            }
-
-            /* If we are using twin-range interactions where the long-range component
-             * is only evaluated every nstcalclr>1 steps, we should do a special update
-             * step to combine the long-range forces on these steps.
-             * For nstcalclr=1 this is not done, since the forces would have been added
-             * directly to the short-range forces already.
-             *
-             * TODO Remove various aspects of VV+twin-range in master
-             * branch, because VV integrators did not ever support
-             * twin-range multiple time stepping with constraints.
-             */
-            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC,
-                          f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                          ekind, M, upd, bInitStep, etrtVELOCITY1,
-                          cr, nrnb, constr, &top->idef);
-
-            if (!bRerunMD || rerun_fr.bV || bForceUpdate)         /* Why is rerun_fr.bV here?  Unclear. */
-            {
-                wallcycle_stop(wcycle, ewcUPDATE);
-                update_constraints(fplog, step, NULL, ir, mdatoms,
-                                   state, fr->bMolPBC, graph, f,
-                                   &top->idef, shake_vir,
-                                   cr, nrnb, wcycle, upd, constr,
-                                   TRUE, bCalcVir);
-                wallcycle_start(wcycle, ewcUPDATE);
-                if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
-                {
-                    /* Correct the virial for multiple time stepping */
-                    m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
-                }
-            }
-            else if (graph)
-            {
-                /* Need to unshift here if a do_force has been
-                   called in the previous step */
-                unshift_self(graph, state->box, state->x);
-            }
-            /* if VV, compute the pressure and constraints */
-            /* For VV2, we strictly only need this if using pressure
-             * control, but we really would like to have accurate pressures
-             * printed out.
-             * Think about ways around this in the future?
-             * For now, keep this choice in comments.
-             */
-            /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
-            /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
-            bPres = TRUE;
-            bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
-            if (bCalcEner && ir->eI == eiVVAK)
-            {
-                bSumEkinhOld = TRUE;
-            }
-            /* for vv, the first half of the integration actually corresponds to the previous step.
-               So we need information from the last step in the first half of the integration */
-            if (bGStat || do_per_step(step-1, nstglobalcomm))
-            {
-                wallcycle_stop(wcycle, ewcUPDATE);
-                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                                constr, NULL, FALSE, state->box,
-                                top_global, &bSumEkinhOld,
-                                cglo_flags
-                                | CGLO_ENERGY
-                                | (bTemp ? CGLO_TEMPERATURE : 0)
-                                | (bPres ? CGLO_PRESSURE : 0)
-                                | (bPres ? CGLO_CONSTRAINT : 0)
-                                | (bStopCM ? CGLO_STOPCM : 0)
-                                | CGLO_SCALEEKIN
-                                );
-                /* explanation of above:
-                   a) We compute Ekin at the full time step
-                   if 1) we are using the AveVel Ekin, and it's not the
-                   initial step, or 2) if we are using AveEkin, but need the full
-                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
-                   b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
-                   EkinAveVel because it's needed for the pressure */
-                wallcycle_start(wcycle, ewcUPDATE);
-            }
-            /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
-            if (!bInitStep)
-            {
-                if (bTrotter)
-                {
-                    m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
-                    trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
-                }
-                else
-                {
-                    if (bExchanged)
-                    {
-                        wallcycle_stop(wcycle, ewcUPDATE);
-                        /* We need the kinetic energy at minus the half step for determining
-                         * the full step kinetic energy and possibly for T-coupling.*/
-                        /* This may not be quite working correctly yet . . . . */
-                        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                                        wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
-                                        constr, NULL, FALSE, state->box,
-                                        top_global, &bSumEkinhOld,
-                                        CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
-                        wallcycle_start(wcycle, ewcUPDATE);
-                    }
-                }
-            }
-            if (bTrotter && !bInitStep)
-            {
-                copy_mat(shake_vir, state->svir_prev);
-                copy_mat(force_vir, state->fvir_prev);
-                if (IR_NVT_TROTTER(ir) && ir->eI == eiVV)
-                {
-                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
-                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE);
-                    enerd->term[F_EKIN] = trace(ekind->ekin);
-                }
-            }
-            /* if it's the initial step, we performed this first step just to get the constraint virial */
-            if (ir->eI == eiVV && bInitStep)
-            {
-                copy_rvecn(vbuf, state->v, 0, state->natoms);
-                sfree(vbuf);
-            }
-            wallcycle_stop(wcycle, ewcUPDATE);
-        }
-
-        /* compute the conserved quantity */
-        if (bVV)
-        {
-            saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ);
-            if (ir->eI == eiVV)
-            {
-                last_ekin = enerd->term[F_EKIN];
-            }
-            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
-            {
-                saved_conserved_quantity -= enerd->term[F_DISPCORR];
-            }
-            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
-            if (ir->efep != efepNO && !bRerunMD)
-            {
-                sum_dhdl(enerd, state->lambda, ir->fepvals);
-            }
-        }
-
-        /* ########  END FIRST UPDATE STEP  ############## */
-        /* ########  If doing VV, we now have v(dt) ###### */
-        if (bDoExpanded)
-        {
-            /* perform extended ensemble sampling in lambda - we don't
-               actually move to the new state before outputting
-               statistics, but if performing simulated tempering, we
-               do update the velocities and the tau_t. */
-
-            lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, state->v, mdatoms);
-            /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
-            copy_df_history(&state_global->dfhist, &state->dfhist);
-        }
-
-        /* Now we have the energies and forces corresponding to the
-         * coordinates at time t. We must output all of this before
-         * the update.
-         */
-        do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
-                                 ir, state, state_global, top_global, fr,
-                                 outf, mdebin, ekind, f, f_global,
-                                 &nchkpt,
-                                 bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT),
-                                 bSumEkinhOld);
-        /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
-        bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle);
-
-        /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
-        if (bStartingFromCpt && bTrotter)
-        {
-            copy_mat(state->svir_prev, shake_vir);
-            copy_mat(state->fvir_prev, force_vir);
-        }
-
-        elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting);
-
-        /* Check whether everything is still allright */
-        if (((int)gmx_get_stop_condition() > handled_stop_condition)
-#ifdef GMX_THREAD_MPI
-            && MASTER(cr)
-#endif
-            )
-        {
-            /* this is just make gs.sig compatible with the hack
-               of sending signals around by MPI_Reduce with together with
-               other floats */
-            if (gmx_get_stop_condition() == gmx_stop_cond_next_ns)
-            {
-                gs.sig[eglsSTOPCOND] = 1;
-            }
-            if (gmx_get_stop_condition() == gmx_stop_cond_next)
-            {
-                gs.sig[eglsSTOPCOND] = -1;
-            }
-            /* < 0 means stop at next step, > 0 means stop at next NS step */
-            if (fplog)
-            {
-                fprintf(fplog,
-                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
-                        gmx_get_signal_name(),
-                        gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
-                fflush(fplog);
-            }
-            fprintf(stderr,
-                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
-                    gmx_get_signal_name(),
-                    gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
-            fflush(stderr);
-            handled_stop_condition = (int)gmx_get_stop_condition();
-        }
-        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
-                 (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) &&
-                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
-        {
-            /* Signal to terminate the run */
-            gs.sig[eglsSTOPCOND] = 1;
-            if (fplog)
-            {
-                fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
-            }
-            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
-        }
-
-        if (bResetCountersHalfMaxH && MASTER(cr) &&
-            elapsed_time > max_hours*60.0*60.0*0.495)
-        {
-            /* Set flag that will communicate the signal to all ranks in the simulation */
-            gs.sig[eglsRESETCOUNTERS] = 1;
-        }
-
-        /* In parallel we only have to check for checkpointing in steps
-         * where we do global communication,
-         *  otherwise the other nodes don't know.
-         */
-        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
-                           cpt_period >= 0 &&
-                           (cpt_period == 0 ||
-                            elapsed_time >= nchkpt*cpt_period*60.0)) &&
-            gs.set[eglsCHKPT] == 0)
-        {
-            gs.sig[eglsCHKPT] = 1;
-        }
-
-        /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
-           in preprocessing */
-
-        if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
-        {
-            gmx_bool bIfRandomize;
-            bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state, upd, constr);
-            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
-            if (constr && bIfRandomize)
-            {
-                update_constraints(fplog, step, NULL, ir, mdatoms,
-                                   state, fr->bMolPBC, graph, f,
-                                   &top->idef, tmp_vir,
-                                   cr, nrnb, wcycle, upd, constr,
-                                   TRUE, bCalcVir);
-            }
-        }
-        /* #########   START SECOND UPDATE STEP ################# */
-        /* Box is changed in update() when we do pressure coupling,
-         * but we should still use the old box for energy corrections and when
-         * writing it to the energy file, so it matches the trajectory files for
-         * the same timestep above. Make a copy in a separate array.
-         */
-        copy_mat(state->box, lastbox);
-
-        dvdl_constr = 0;
-
-        if (!bRerunMD || rerun_fr.bV || bForceUpdate)
-        {
-            wallcycle_start(wcycle, ewcUPDATE);
-            /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
-            if (bTrotter)
-            {
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
-                /* We can only do Berendsen coupling after we have summed
-                 * the kinetic energy or virial. Since the happens
-                 * in global_state after update, we should only do it at
-                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
-                 */
-            }
-            else
-            {
-                update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
-                update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep);
-            }
-
-            if (bVV)
-            {
-                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-                /* velocity half-step update */
-                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
-                              bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                              ekind, M, upd, FALSE, etrtVELOCITY2,
-                              cr, nrnb, constr, &top->idef);
-            }
-
-            /* Above, initialize just copies ekinh into ekin,
-             * it doesn't copy position (for VV),
-             * and entire integrator for MD.
-             */
-
-            if (ir->eI == eiVVAK)
-            {
-                /* We probably only need md->homenr, not state->natoms */
-                if (state->natoms > cbuf_nalloc)
-                {
-                    cbuf_nalloc = state->natoms;
-                    srenew(cbuf, cbuf_nalloc);
-                }
-                copy_rvecn(state->x, cbuf, 0, state->natoms);
-            }
-            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
-                          bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                          ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
-            wallcycle_stop(wcycle, ewcUPDATE);
-
-            update_constraints(fplog, step, &dvdl_constr, ir, mdatoms, state,
-                               fr->bMolPBC, graph, f,
-                               &top->idef, shake_vir,
-                               cr, nrnb, wcycle, upd, constr,
-                               FALSE, bCalcVir);
-
-            if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
-            {
-                /* Correct the virial for multiple time stepping */
-                m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
-            }
-
-            if (ir->eI == eiVVAK)
-            {
-                /* erase F_EKIN and F_TEMP here? */
-                /* just compute the kinetic energy at the half step to perform a trotter step */
-                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                                constr, NULL, FALSE, lastbox,
-                                top_global, &bSumEkinhOld,
-                                cglo_flags | CGLO_TEMPERATURE
-                                );
-                wallcycle_start(wcycle, ewcUPDATE);
-                trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
-                /* now we know the scaling, we can compute the positions again again */
-                copy_rvecn(cbuf, state->x, 0, state->natoms);
-
-                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
-
-                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
-                              bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
-                              ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
-                wallcycle_stop(wcycle, ewcUPDATE);
-
-                /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
-                /* are the small terms in the shake_vir here due
-                 * to numerical errors, or are they important
-                 * physically? I'm thinking they are just errors, but not completely sure.
-                 * For now, will call without actually constraining, constr=NULL*/
-                update_constraints(fplog, step, NULL, ir, mdatoms,
-                                   state, fr->bMolPBC, graph, f,
-                                   &top->idef, tmp_vir,
-                                   cr, nrnb, wcycle, upd, NULL,
-                                   FALSE, bCalcVir);
-            }
-            if (bVV)
-            {
-                /* this factor or 2 correction is necessary
-                   because half of the constraint force is removed
-                   in the vv step, so we have to double it.  See
-                   the Redmine issue #1255.  It is not yet clear
-                   if the factor of 2 is exact, or just a very
-                   good approximation, and this will be
-                   investigated.  The next step is to see if this
-                   can be done adding a dhdl contribution from the
-                   rattle step, but this is somewhat more
-                   complicated with the current code. Will be
-                   investigated, hopefully for 4.6.3. However,
-                   this current solution is much better than
-                   having it completely wrong.
-                 */
-                enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr;
-            }
-            else
-            {
-                enerd->term[F_DVDL_CONSTR] += dvdl_constr;
-            }
-        }
-        else if (graph)
-        {
-            /* Need to unshift here */
-            unshift_self(graph, state->box, state->x);
-        }
-
-        if (vsite != NULL)
-        {
-            wallcycle_start(wcycle, ewcVSITECONSTR);
-            if (graph != NULL)
-            {
-                shift_self(graph, state->box, state->x);
-            }
-            construct_vsites(vsite, state->x, ir->delta_t, state->v,
-                             top->idef.iparams, top->idef.il,
-                             fr->ePBC, fr->bMolPBC, cr, state->box);
-
-            if (graph != NULL)
-            {
-                unshift_self(graph, state->box, state->x);
-            }
-            wallcycle_stop(wcycle, ewcVSITECONSTR);
-        }
-
-        /* ############## IF NOT VV, Calculate globals HERE  ############ */
-        /* With Leap-Frog we can skip compute_globals at
-         * non-communication steps, but we need to calculate
-         * the kinetic energy one step before communication.
-         */
-        if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)))
-        {
-            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
-                            wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
-                            constr, &gs,
-                            (step_rel % gs.nstms == 0) &&
-                            (multisim_nsteps < 0 || (step_rel < multisim_nsteps)),
-                            lastbox,
-                            top_global, &bSumEkinhOld,
-                            cglo_flags
-                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
-                            | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
-                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
-                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
-                            | CGLO_CONSTRAINT
-                            );
-        }
-
-        /* #############  END CALC EKIN AND PRESSURE ################# */
-
-        /* Note: this is OK, but there are some numerical precision issues with using the convergence of
-           the virial that should probably be addressed eventually. state->veta has better properies,
-           but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
-           generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
-
-        if (ir->efep != efepNO && (!bVV || bRerunMD))
-        {
-            /* Sum up the foreign energy and dhdl terms for md and sd.
-               Currently done every step so that dhdl is correct in the .edr */
-            sum_dhdl(enerd, state->lambda, ir->fepvals);
-        }
-        update_box(fplog, step, ir, mdatoms, state, f,
-                   pcoupl_mu, nrnb, upd);
-
-        /* ################# END UPDATE STEP 2 ################# */
-        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
-
-        /* The coordinates (x) were unshifted in update */
-        if (!bGStat)
-        {
-            /* We will not sum ekinh_old,
-             * so signal that we still have to do it.
-             */
-            bSumEkinhOld = TRUE;
-        }
-
-        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
-
-        /* use the directly determined last velocity, not actually the averaged half steps */
-        if (bTrotter && ir->eI == eiVV)
-        {
-            enerd->term[F_EKIN] = last_ekin;
-        }
-        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
-
-        if (bVV)
-        {
-            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
-        }
-        else
-        {
-            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ);
-        }
-        /* #########  END PREPARING EDR OUTPUT  ###########  */
-
-        /* Output stuff */
-        if (MASTER(cr))
-        {
-            if (fplog && do_log && bDoExpanded)
-            {
-                /* only needed if doing expanded ensemble */
-                PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL,
-                                          &state_global->dfhist, state->fep_state, ir->nstlog, step);
-            }
-            if (bCalcEner)
-            {
-                upd_mdebin(mdebin, bDoDHDL, bCalcEnerStep,
-                           t, mdatoms->tmass, enerd, state,
-                           ir->fepvals, ir->expandedvals, lastbox,
-                           shake_vir, force_vir, total_vir, pres,
-                           ekind, mu_tot, constr);
-            }
-            else
-            {
-                upd_mdebin_step(mdebin);
-            }
-
-            gmx_bool do_dr  = do_per_step(step, ir->nstdisreout);
-            gmx_bool do_or  = do_per_step(step, ir->nstorireout);
-
-            print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL,
-                       step, t,
-                       eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts));
-
-            if (ir->bPull)
-            {
-                pull_print_output(ir->pull_work, step, t);
-            }
-
-            if (do_per_step(step, ir->nstlog))
-            {
-                if (fflush(fplog) != 0)
-                {
-                    gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
-                }
-            }
-        }
-        if (bDoExpanded)
-        {
-            /* Have to do this part _after_ outputting the logfile and the edr file */
-            /* Gets written into the state at the beginning of next loop*/
-            state->fep_state = lamnew;
-        }
-        /* Print the remaining wall clock time for the run */
-        if (MULTIMASTER(cr) &&
-            (do_verbose || gmx_got_usr_signal()) &&
-            !bPMETunePrinting)
-        {
-            if (shellfc)
-            {
-                fprintf(stderr, "\n");
-            }
-            print_time(stderr, walltime_accounting, step, ir, cr);
-        }
-
-        /* Ion/water position swapping.
-         * Not done in last step since trajectory writing happens before this call
-         * in the MD loop and exchanges would be lost anyway. */
-        bNeedRepartition = FALSE;
-        if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep &&
-            do_per_step(step, ir->swap->nstswap))
-        {
-            bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle,
-                                             bRerunMD ? rerun_fr.x   : state->x,
-                                             bRerunMD ? rerun_fr.box : state->box,
-                                             top_global, MASTER(cr) && bVerbose, bRerunMD);
-
-            if (bNeedRepartition && DOMAINDECOMP(cr))
-            {
-                dd_collect_state(cr->dd, state, state_global);
-            }
-        }
-
-        /* Replica exchange */
-        bExchanged = FALSE;
-        if (bDoReplEx)
-        {
-            bExchanged = replica_exchange(fplog, cr, repl_ex,
-                                          state_global, enerd,
-                                          state, step, t);
-        }
-
-        if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) )
-        {
-            dd_partition_system(fplog, step, cr, TRUE, 1,
-                                state_global, top_global, ir,
-                                state, &f, mdatoms, top, fr,
-                                vsite, shellfc, constr,
-                                nrnb, wcycle, FALSE);
-        }
-
-        bFirstStep       = FALSE;
-        bInitStep        = FALSE;
-        bStartingFromCpt = FALSE;
-
-        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
-        /* With all integrators, except VV, we need to retain the pressure
-         * at the current step for coupling at the next step.
-         */
-        if ((state->flags & (1<<estPRES_PREV)) &&
-            (bGStatEveryStep ||
-             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
-        {
-            /* Store the pressure in t_state for pressure coupling
-             * at the next MD step.
-             */
-            copy_mat(pres, state->pres_prev);
-        }
-
-        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
-
-        if ( (membed != NULL) && (!bLastStep) )
-        {
-            rescale_membed(step_rel, membed, state_global->x);
-        }
-
-        if (bRerunMD)
-        {
-            if (MASTER(cr))
-            {
-                /* read next frame from input trajectory */
-                bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
-            }
-
-            if (PAR(cr))
-            {
-                rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
-            }
-        }
-
-        cycles = wallcycle_stop(wcycle, ewcSTEP);
-        if (DOMAINDECOMP(cr) && wcycle)
-        {
-            dd_cycles_add(cr->dd, cycles, ddCyclStep);
-        }
-
-        if (!bRerunMD || !rerun_fr.bStep)
-        {
-            /* increase the MD step number */
-            step++;
-            step_rel++;
-        }
-
-        /* TODO make a counter-reset module */
-        /* If it is time to reset counters, set a flag that remains
-           true until counters actually get reset */
-        if (step_rel == wcycle_get_reset_counters(wcycle) ||
-            gs.set[eglsRESETCOUNTERS] != 0)
-        {
-            if (pme_loadbal_is_active(pme_loadbal))
-            {
-                /* Do not permit counter reset while PME load
-                 * balancing is active. The only purpose for resetting
-                 * counters is to measure reliable performance data,
-                 * and that can't be done before balancing
-                 * completes.
-                 *
-                 * TODO consider fixing this by delaying the reset
-                 * until after load balancing completes,
-                 * e.g. https://gerrit.gromacs.org/#/c/4964/2 */
-                gmx_fatal(FARGS, "PME tuning was still active when attempting to "
-                          "reset mdrun counters at step %" GMX_PRId64 ". Try "
-                          "resetting counters later in the run, e.g. with gmx "
-                          "mdrun -resetstep.", step);
-            }
-            reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting,
-                               use_GPU(fr->nbv) ? fr->nbv : NULL);
-            wcycle_set_reset_counters(wcycle, -1);
-            if (!(cr->duty & DUTY_PME))
-            {
-                /* Tell our PME node to reset its counters */
-                gmx_pme_send_resetcounters(cr, step);
-            }
-            /* Correct max_hours for the elapsed time */
-            max_hours                -= elapsed_time/(60.0*60.0);
-            /* If mdrun -maxh -resethway was active, it can only trigger once */
-            bResetCountersHalfMaxH    = FALSE; /* TODO move this to where gs.sig[eglsRESETCOUNTERS] is set */
-            /* Reset can only happen once, so clear the triggering flag. */
-            gs.set[eglsRESETCOUNTERS] = 0;
-        }
-
-        /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
-        IMD_prep_energies_send_positions(ir->bIMD && MASTER(cr), bIMDstep, ir->imd, enerd, step, bCalcEner, wcycle);
-
-    }
-    /* End of main MD loop */
-    debug_gmx();
-
-    /* Closing TNG files can include compressing data. Therefore it is good to do that
-     * before stopping the time measurements. */
-    mdoutf_tng_close(outf);
-
-    /* Stop measuring walltime */
-    walltime_accounting_end(walltime_accounting);
-
-    if (bRerunMD && MASTER(cr))
-    {
-        close_trj(status);
-    }
-
-    if (!(cr->duty & DUTY_PME))
-    {
-        /* Tell the PME only node to finish */
-        gmx_pme_send_finish(cr);
-    }
-
-    if (MASTER(cr))
-    {
-        if (ir->nstcalcenergy > 0 && !bRerunMD)
-        {
-            print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t,
-                       eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts));
-        }
-    }
-
-    done_mdoutf(outf);
-    debug_gmx();
-
-    if (bPMETune)
-    {
-        pme_loadbal_done(pme_loadbal, cr, fplog, use_GPU(fr->nbv));
-    }
-
-    if (shellfc && fplog)
-    {
-        fprintf(fplog, "Fraction of iterations that converged:           %.2f %%\n",
-                (nconverged*100.0)/step_rel);
-        fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n",
-                tcount/step_rel);
-    }
-
-    if (repl_ex_nst > 0 && MASTER(cr))
-    {
-        print_replica_exchange_statistics(fplog, repl_ex);
-    }
-
-    /* IMD cleanup, if bIMD is TRUE. */
-    IMD_finalize(ir->bIMD, ir->imd);
-
-    walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
-
-    return 0;
-}
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/mdrun.cpp b/patches/gromacs-5.1.4.diff/src/programs/mdrun/mdrun.cpp
deleted file mode 100644
index 1ef58aedc6e29dbcccf927e349d8597b2293c3ec..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/mdrun.cpp
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \defgroup module_mdrun Implementation of mdrun
- * \ingroup group_mdrun
- *
- * \brief This module contains code that implements mdrun.
- */
-/*! \internal \file
- *
- * \brief This file implements mdrun
- *
- * \author Berk Hess <hess@kth.se>
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Erik Lindahl <erik@kth.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#include "gromacs/commandline/pargs.h"
-#include "gromacs/fileio/filenm.h"
-#include "gromacs/legacyheaders/macros.h"
-#include "gromacs/legacyheaders/main.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/readinp.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/utility/fatalerror.h"
-
-#include "mdrun_main.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain; 
-extern void(*plumedcmd)(plumed,const char*,const void*);
-/* END PLUMED */
-
-/* PLUMED HREX */
-int plumed_hrex;
-/* END PLUMED HREX */
-
-/*! \brief Return whether either of the command-line parameters that
- *  will trigger a multi-simulation is set */
-static bool is_multisim_option_set(int argc, const char *const argv[])
-{
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp(argv[i], "-multi") == 0 || strcmp(argv[i], "-multidir") == 0)
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-//! Implements C-style main function for mdrun
-int gmx_mdrun(int argc, char *argv[])
-{
-    const char   *desc[] = {
-        "[THISMODULE] is the main computational chemistry engine",
-        "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
-        "but it can also perform Stochastic Dynamics, Energy Minimization,",
-        "test particle insertion or (re)calculation of energies.",
-        "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
-        "builds a Hessian matrix from single conformation.",
-        "For usual Normal Modes-like calculations, make sure that",
-        "the structure provided is properly energy-minimized.",
-        "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]",
-        "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
-        "and distributes the topology over ranks if needed.",
-        "[TT]mdrun[tt] produces at least four output files.",
-        "A single log file ([TT]-g[tt]) is written.",
-        "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
-        "optionally forces.",
-        "The structure file ([TT]-c[tt]) contains the coordinates and",
-        "velocities of the last step.",
-        "The energy file ([TT]-e[tt]) contains energies, the temperature,",
-        "pressure, etc, a lot of these things are also printed in the log file.",
-        "Optionally coordinates can be written to a compressed trajectory file",
-        "([TT]-x[tt]).[PAR]",
-        "The option [TT]-dhdl[tt] is only used when free energy calculation is",
-        "turned on.[PAR]",
-        "Running mdrun efficiently in parallel is a complex topic topic,",
-        "many aspects of which are covered in the online User Guide. You",
-        "should look there for practical advice on using many of the options",
-        "available in mdrun.[PAR]",
-        "ED (essential dynamics) sampling and/or additional flooding potentials",
-        "are switched on by using the [TT]-ei[tt] flag followed by an [REF].edi[ref]",
-        "file. The [REF].edi[ref] file can be produced with the [TT]make_edi[tt] tool",
-        "or by using options in the essdyn menu of the WHAT IF program.",
-        "[TT]mdrun[tt] produces a [REF].xvg[ref] output file that",
-        "contains projections of positions, velocities and forces onto selected",
-        "eigenvectors.[PAR]",
-        "When user-defined potential functions have been selected in the",
-        "[REF].mdp[ref] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
-        "a formatted table with potential functions. The file is read from",
-        "either the current directory or from the [TT]GMXLIB[tt] directory.",
-        "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
-        "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
-        "normal Coulomb.",
-        "When pair interactions are present, a separate table for pair interaction",
-        "functions is read using the [TT]-tablep[tt] option.[PAR]",
-        "When tabulated bonded functions are present in the topology,",
-        "interaction functions are read using the [TT]-tableb[tt] option.",
-        "For each different tabulated interaction type used, a table file name must",
-        "be given. For the topology to work, a file name given here must match a",
-        "character sequence before the file extension. That sequence is: an underscore,",
-        "then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals,",
-        "and finally the matching table number index used in the topology.[PAR]",
-        "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
-        "coordinates and forces when pulling is selected",
-        "in the [REF].mdp[ref] file.[PAR]",
-        "Finally some experimental algorithms can be tested when the",
-        "appropriate options have been given. Currently under",
-        "investigation are: polarizability.",
-        "[PAR]",
-        "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed",
-        "a protein into a membrane. This module requires a number of settings",
-        "that are provided in a data file that is the argument of this option.",
-        "For more details in membrane embedding, see the documentation in the",
-        "user guide. The options [TT]-mn[tt] and [TT]-mp[tt] are used to provide",
-        "the index and topology files used for the embedding.",
-        "[PAR]",
-        "The option [TT]-pforce[tt] is useful when you suspect a simulation",
-        "crashes due to too large forces. With this option coordinates and",
-        "forces of atoms with a force larger than a certain value will",
-        "be printed to stderr.",
-        "[PAR]",
-        "Checkpoints containing the complete state of the system are written",
-        "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
-        "unless option [TT]-cpt[tt] is set to -1.",
-        "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
-        "make sure that a recent state of the system is always available,",
-        "even when the simulation is terminated while writing a checkpoint.",
-        "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
-        "with the step number.",
-        "A simulation can be continued by reading the full state from file",
-        "with option [TT]-cpi[tt]. This option is intelligent in the way that",
-        "if no checkpoint file is found, GROMACS just assumes a normal run and",
-        "starts from the first step of the [REF].tpr[ref] file. By default the output",
-        "will be appending to the existing output files. The checkpoint file",
-        "contains checksums of all output files, such that you will never",
-        "loose data when some output files are modified, corrupt or removed.",
-        "There are three scenarios with [TT]-cpi[tt]:[PAR]",
-        "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
-        "[TT]*[tt] all files are present with names and checksums matching those stored",
-        "in the checkpoint file: files are appended[PAR]",
-        "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
-        "With [TT]-noappend[tt] new output files are opened and the simulation",
-        "part number is added to all output file names.",
-        "Note that in all cases the checkpoint file itself is not renamed",
-        "and will be overwritten, unless its name does not match",
-        "the [TT]-cpo[tt] option.",
-        "[PAR]",
-        "With checkpointing the output is appended to previously written",
-        "output files, unless [TT]-noappend[tt] is used or none of the previous",
-        "output files are present (except for the checkpoint file).",
-        "The integrity of the files to be appended is verified using checksums",
-        "which are stored in the checkpoint file. This ensures that output can",
-        "not be mixed up or corrupted due to file appending. When only some",
-        "of the previous output files are present, a fatal error is generated",
-        "and no old output files are modified and no new output files are opened.",
-        "The result with appending will be the same as from a single run.",
-        "The contents will be binary identical, unless you use a different number",
-        "of ranks or dynamic load balancing or the FFT library uses optimizations",
-        "through timing.",
-        "[PAR]",
-        "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
-        "file is written at the first neighbor search step where the run time",
-        "exceeds [TT]-maxh[tt]\\*0.99 hours. This option is particularly useful in",
-        "combination with setting [TT]nsteps[tt] to -1 either in the mdp or using the",
-        "similarly named command line option. This results in an infinite run,",
-        "terminated only when the time limit set by [TT]-maxh[tt] is reached (if any)"
-        "or upon receiving a signal."
-        "[PAR]",
-        "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
-        "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
-        "pressed), it will stop after the next neighbor search step ",
-        "(with nstlist=0 at the next step).",
-        "In both cases all the usual output will be written to file.",
-        "When running with MPI, a signal to one of the [TT]mdrun[tt] ranks",
-        "is sufficient, this signal should not be sent to mpirun or",
-        "the [TT]mdrun[tt] process that is the parent of the others.",
-        "[PAR]",
-        "Interactive molecular dynamics (IMD) can be activated by using at least one",
-        "of the three IMD switches: The [TT]-imdterm[tt] switch allows to terminate",
-        "the simulation from the molecular viewer (e.g. VMD). With [TT]-imdwait[tt],",
-        "[TT]mdrun[tt] pauses whenever no IMD client is connected. Pulling from the",
-        "IMD remote can be turned on by [TT]-imdpull[tt].",
-        "The port [TT]mdrun[tt] listens to can be altered by [TT]-imdport[tt].The",
-        "file pointed to by [TT]-if[tt] contains atom indices and forces if IMD",
-        "pulling is used."
-        "[PAR]",
-        "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
-    };
-    t_commrec    *cr;
-    t_filenm      fnm[] = {
-        { efTPR, NULL,      NULL,       ffREAD },
-        { efTRN, "-o",      NULL,       ffWRITE },
-        { efCOMPRESSED, "-x", NULL,     ffOPTWR },
-        { efCPT, "-cpi",    NULL,       ffOPTRD | ffALLOW_MISSING },
-        { efCPT, "-cpo",    NULL,       ffOPTWR },
-        { efSTO, "-c",      "confout",  ffWRITE },
-        { efEDR, "-e",      "ener",     ffWRITE },
-        { efLOG, "-g",      "md",       ffWRITE },
-        { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
-        { efXVG, "-field",  "field",    ffOPTWR },
-        { efXVG, "-table",  "table",    ffOPTRD },
-        { efXVG, "-tabletf", "tabletf",    ffOPTRD },
-        { efXVG, "-tablep", "tablep",   ffOPTRD },
-        { efXVG, "-tableb", "table",    ffOPTRDMULT },
-        { efTRX, "-rerun",  "rerun",    ffOPTRD },
-        { efXVG, "-tpi",    "tpi",      ffOPTWR },
-        { efXVG, "-tpid",   "tpidist",  ffOPTWR },
-        { efEDI, "-ei",     "sam",      ffOPTRD },
-        { efXVG, "-eo",     "edsam",    ffOPTWR },
-        { efXVG, "-devout", "deviatie", ffOPTWR },
-        { efXVG, "-runav",  "runaver",  ffOPTWR },
-        { efXVG, "-px",     "pullx",    ffOPTWR },
-        { efXVG, "-pf",     "pullf",    ffOPTWR },
-        { efXVG, "-ro",     "rotation", ffOPTWR },
-        { efLOG, "-ra",     "rotangles", ffOPTWR },
-        { efLOG, "-rs",     "rotslabs", ffOPTWR },
-        { efLOG, "-rt",     "rottorque", ffOPTWR },
-        { efMTX, "-mtx",    "nm",       ffOPTWR },
-        { efNDX, "-dn",     "dipole",   ffOPTWR },
-        { efRND, "-multidir", NULL,      ffOPTRDMULT},
-        { efDAT, "-plumed", "plumed",   ffOPTRD },   /* PLUMED */
-        { efDAT, "-membed", "membed",   ffOPTRD },
-        { efTOP, "-mp",     "membed",   ffOPTRD },
-        { efNDX, "-mn",     "membed",   ffOPTRD },
-        { efXVG, "-if",     "imdforces", ffOPTWR },
-        { efXVG, "-swap",   "swapions", ffOPTWR }
-    };
-    const int     NFILE = asize(fnm);
-
-    /* Command line options ! */
-    gmx_bool        bDDBondCheck  = TRUE;
-    gmx_bool        bDDBondComm   = TRUE;
-    gmx_bool        bTunePME      = TRUE;
-    gmx_bool        bVerbose      = FALSE;
-    gmx_bool        bCompact      = TRUE;
-    gmx_bool        bRerunVSite   = FALSE;
-    gmx_bool        bConfout      = TRUE;
-    gmx_bool        bReproducible = FALSE;
-    gmx_bool        bIMDwait      = FALSE;
-    gmx_bool        bIMDterm      = FALSE;
-    gmx_bool        bIMDpull      = FALSE;
-
-    int             npme          = -1;
-    int             nstlist       = 0;
-    int             nmultisim     = 0;
-    int             nstglobalcomm = -1;
-    int             repl_ex_nst   = 0;
-    int             repl_ex_seed  = -1;
-    int             repl_ex_nex   = 0;
-    int             nstepout      = 100;
-    int             resetstep     = -1;
-    gmx_int64_t     nsteps        = -2;   /* the value -2 means that the mdp option will be used */
-    int             imdport       = 8888; /* can be almost anything, 8888 is easy to remember */
-
-    rvec            realddxyz          = {0, 0, 0};
-    const char     *ddno_opt[ddnoNR+1] =
-    { NULL, "interleave", "pp_pme", "cartesian", NULL };
-    const char     *dddlb_opt[] =
-    { NULL, "auto", "no", "yes", NULL };
-    const char     *thread_aff_opt[threadaffNR+1] =
-    { NULL, "auto", "on", "off", NULL };
-    const char     *nbpu_opt[] =
-    { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL };
-    real            rdd                   = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1;
-    char           *ddcsx                 = NULL, *ddcsy = NULL, *ddcsz = NULL;
-    real            cpt_period            = 15.0, max_hours = -1;
-    gmx_bool        bTryToAppendFiles     = TRUE;
-    gmx_bool        bKeepAndNumCPT        = FALSE;
-    gmx_bool        bResetCountersHalfWay = FALSE;
-    output_env_t    oenv                  = NULL;
-
-    /* Non transparent initialization of a complex gmx_hw_opt_t struct.
-     * But unfortunately we are not allowed to call a function here,
-     * since declarations follow below.
-     */
-    gmx_hw_opt_t    hw_opt = {
-        0, 0, 0, 0, threadaffSEL, 0, 0,
-        { NULL, FALSE, 0, NULL }
-    };
-
-    t_pargs         pa[] = {
-
-        { "-dd",      FALSE, etRVEC, {&realddxyz},
-          "Domain decomposition grid, 0 is optimize" },
-        { "-ddorder", FALSE, etENUM, {ddno_opt},
-          "DD rank order" },
-        { "-npme",    FALSE, etINT, {&npme},
-          "Number of separate ranks to be used for PME, -1 is guess" },
-        { "-nt",      FALSE, etINT, {&hw_opt.nthreads_tot},
-          "Total number of threads to start (0 is guess)" },
-        { "-ntmpi",   FALSE, etINT, {&hw_opt.nthreads_tmpi},
-          "Number of thread-MPI threads to start (0 is guess)" },
-        { "-ntomp",   FALSE, etINT, {&hw_opt.nthreads_omp},
-          "Number of OpenMP threads per MPI rank to start (0 is guess)" },
-        { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme},
-          "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" },
-        { "-pin",     FALSE, etENUM, {thread_aff_opt},
-          "Whether mdrun should try to set thread affinities" },
-        { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset},
-          "The lowest logical core number to which mdrun should pin the first thread" },
-        { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
-          "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
-        { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
-          "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" },
-        { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
-          "Check for all bonded interactions with DD" },
-        { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
-          "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
-        { "-rdd",     FALSE, etREAL, {&rdd},
-          "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
-        { "-rcon",    FALSE, etREAL, {&rconstr},
-          "Maximum distance for P-LINCS (nm), 0 is estimate" },
-        { "-dlb",     FALSE, etENUM, {dddlb_opt},
-          "Dynamic load balancing (with DD)" },
-        { "-dds",     FALSE, etREAL, {&dlb_scale},
-          "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to "
-          "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." },
-        { "-ddcsx",   FALSE, etSTR, {&ddcsx},
-          "HIDDENA string containing a vector of the relative sizes in the x "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsy",   FALSE, etSTR, {&ddcsy},
-          "HIDDENA string containing a vector of the relative sizes in the y "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsz",   FALSE, etSTR, {&ddcsz},
-          "HIDDENA string containing a vector of the relative sizes in the z "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-gcom",    FALSE, etINT, {&nstglobalcomm},
-          "Global communication frequency" },
-        { "-nb",      FALSE, etENUM, {&nbpu_opt},
-          "Calculate non-bonded interactions on" },
-        { "-nstlist", FALSE, etINT, {&nstlist},
-          "Set nstlist when using a Verlet buffer tolerance (0 is guess)" },
-        { "-tunepme", FALSE, etBOOL, {&bTunePME},
-          "Optimize PME load between PP/PME ranks or GPU/CPU" },
-        { "-v",       FALSE, etBOOL, {&bVerbose},
-          "Be loud and noisy" },
-        { "-compact", FALSE, etBOOL, {&bCompact},
-          "Write a compact log file" },
-        { "-pforce",  FALSE, etREAL, {&pforce},
-          "Print all forces larger than this (kJ/mol nm)" },
-        { "-reprod",  FALSE, etBOOL, {&bReproducible},
-          "Try to avoid optimizations that affect binary reproducibility" },
-        { "-cpt",     FALSE, etREAL, {&cpt_period},
-          "Checkpoint interval (minutes)" },
-        { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
-          "Keep and number checkpoint files" },
-        { "-append",  FALSE, etBOOL, {&bTryToAppendFiles},
-          "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
-        { "-nsteps",  FALSE, etINT64, {&nsteps},
-          "Run this number of steps, overrides .mdp file option (-1 means infinite, -2 means use mdp option, smaller is invalid)" },
-        { "-maxh",   FALSE, etREAL, {&max_hours},
-          "Terminate after 0.99 times this time (hours)" },
-        { "-multi",   FALSE, etINT, {&nmultisim},
-          "Do multiple simulations in parallel" },
-        { "-replex",  FALSE, etINT, {&repl_ex_nst},
-          "Attempt replica exchange periodically with this period (steps)" },
-        { "-nex",  FALSE, etINT, {&repl_ex_nex},
-          "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion).  -nex zero or not specified gives neighbor replica exchange." },
-        { "-reseed",  FALSE, etINT, {&repl_ex_seed},
-          "Seed for replica exchange, -1 is generate a seed" },
-        { "-hrex",  FALSE, etBOOL, {&plumed_hrex},
-          "Enable hamiltonian replica exchange" },
-        { "-imdport",    FALSE, etINT, {&imdport},
-          "HIDDENIMD listening port" },
-        { "-imdwait",  FALSE, etBOOL, {&bIMDwait},
-          "HIDDENPause the simulation while no IMD client is connected" },
-        { "-imdterm",  FALSE, etBOOL, {&bIMDterm},
-          "HIDDENAllow termination of the simulation from IMD client" },
-        { "-imdpull",  FALSE, etBOOL, {&bIMDpull},
-          "HIDDENAllow pulling in the simulation from IMD client" },
-        { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
-          "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
-        { "-confout", FALSE, etBOOL, {&bConfout},
-          "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
-        { "-stepout", FALSE, etINT, {&nstepout},
-          "HIDDENFrequency of writing the remaining wall clock time for the run" },
-        { "-resetstep", FALSE, etINT, {&resetstep},
-          "HIDDENReset cycle counters after these many time steps" },
-        { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
-          "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
-    };
-    unsigned long   Flags;
-    ivec            ddxyz;
-    int             dd_node_order;
-    gmx_bool        bDoAppendFiles, bStartFromCpt;
-    FILE           *fplog;
-    int             rc;
-    char          **multidir = NULL;
-
-    cr = init_commrec();
-
-    unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM;
-    // With -multi or -multidir, the file names are going to get processed
-    // further (or the working directory changed), so we can't check for their
-    // existence during parsing.  It isn't useful to do any completion based on
-    // file system contents, either.
-    if (is_multisim_option_set(argc, argv))
-    {
-        PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING;
-    }
-
-    /* Comment this in to do fexist calls only on master
-     * works not with rerun or tables at the moment
-     * also comment out the version of init_forcerec in md.c
-     * with NULL instead of opt2fn
-     */
-    /*
-       if (!MASTER(cr))
-       {
-       PCA_Flags |= PCA_NOT_READ_NODE;
-       }
-     */
-
-    if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa,
-                           asize(desc), desc, 0, NULL, &oenv))
-    {
-        return 0;
-    }
-
-
-    /* we set these early because they might be used in init_multisystem()
-       Note that there is the potential for npme>nnodes until the number of
-       threads is set later on, if there's thread parallelization. That shouldn't
-       lead to problems. */
-    dd_node_order = nenum(ddno_opt);
-    cr->npmenodes = npme;
-
-    hw_opt.thread_affinity = nenum(thread_aff_opt);
-
-    /* now check the -multi and -multidir option */
-    if (opt2bSet("-multidir", NFILE, fnm))
-    {
-        if (nmultisim > 0)
-        {
-            gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
-        }
-        nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
-    }
-
-
-    if (repl_ex_nst != 0 && nmultisim < 2)
-    {
-        gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)");
-    }
-
-    if (repl_ex_nex < 0)
-    {
-        gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive");
-    }
-
-    if (nmultisim >= 1)
-    {
-#ifndef GMX_THREAD_MPI
-        gmx_bool bParFn = (multidir == NULL);
-        init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
-#else
-        gmx_fatal(FARGS, "mdrun -multi or -multidir are not supported with the thread-MPI library. "
-                  "Please compile GROMACS with a proper external MPI library.");
-#endif
-    }
-
-    handleRestart(cr, bTryToAppendFiles, NFILE, fnm,
-                  &bDoAppendFiles, &bStartFromCpt);
-
-    Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0;
-    Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
-    Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
-    Flags = Flags | (bTunePME      ? MD_TUNEPME      : 0);
-    Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
-    Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
-    Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
-    Flags = Flags | (bDoAppendFiles ? MD_APPENDFILES  : 0);
-    Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0);
-    Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0);
-    Flags = Flags | (bStartFromCpt ? MD_STARTFROMCPT : 0);
-    Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
-    Flags = Flags | (opt2parg_bSet("-ntomp", asize(pa), pa) ? MD_NTOMPSET : 0);
-    Flags = Flags | (bIMDwait      ? MD_IMDWAIT      : 0);
-    Flags = Flags | (bIMDterm      ? MD_IMDTERM      : 0);
-    Flags = Flags | (bIMDpull      ? MD_IMDPULL      : 0);
-
-    /* We postpone opening the log file if we are appending, so we can
-       first truncate the old log file and append to the correct position
-       there instead.  */
-    if (MASTER(cr) && !bDoAppendFiles)
-    {
-        gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr,
-                     Flags & MD_APPENDFILES, &fplog);
-    }
-    else
-    {
-        fplog = NULL;
-    }
-
-    ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
-    ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
-    ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
-
-    /* PLUMED */
-    plumedswitch=0;
-    if (opt2bSet("-plumed",NFILE,fnm)) plumedswitch=1;
-    if(plumedswitch){
-      plumedcmd=plumed_cmd;
-      int real_precision=sizeof(real);
-      real energyUnits=1.0;
-      real lengthUnits=1.0;
-      real timeUnits=1.0;
-  
-      if(!plumed_installed()){
-        gmx_fatal(FARGS,"Plumed is not available. Check your PLUMED_KERNEL variable.");
-      }
-      plumedmain=plumed_create();
-      plumed_cmd(plumedmain,"setRealPrecision",&real_precision);
-      // this is not necessary for gromacs units:
-      plumed_cmd(plumedmain,"setMDEnergyUnits",&energyUnits);
-      plumed_cmd(plumedmain,"setMDLengthUnits",&lengthUnits);
-      plumed_cmd(plumedmain,"setMDTimeUnits",&timeUnits);
-      //
-      plumed_cmd(plumedmain,"setPlumedDat",ftp2fn(efDAT,NFILE,fnm));
-      plumedswitch=1;
-    }
-    /* PLUMED HREX*/
-    if(getenv("PLUMED_HREX")) plumed_hrex=1;
-    if(plumed_hrex){
-      if(!plumedswitch)  gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) requires -plumed");
-      if(repl_ex_nst==0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) replica exchange");
-      if(repl_ex_nex!=0) gmx_fatal(FARGS,"-hrex (or PLUMED_HREX) not compatible with -nex");
-    }
-    /* END PLUMED HREX */
-
-    /* END PLUMED */
-
-    rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact,
-                  nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr,
-                  dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz,
-                  nbpu_opt[0], nstlist,
-                  nsteps, nstepout, resetstep,
-                  nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed,
-                  pforce, cpt_period, max_hours, imdport, Flags);
-
-    /* Log file has to be closed in mdrunner if we are appending to it
-       (fplog not set here) */
-    if (MASTER(cr) && !bDoAppendFiles)
-    {
-        gmx_log_close(fplog);
-    }
-
-    return rc;
-}
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/mdrun.cpp.preplumed b/patches/gromacs-5.1.4.diff/src/programs/mdrun/mdrun.cpp.preplumed
deleted file mode 100644
index fcdc36952adcd0d2e1ad0dcf25c5c997e1a2dd72..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/mdrun.cpp.preplumed
+++ /dev/null
@@ -1,554 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-/*! \defgroup module_mdrun Implementation of mdrun
- * \ingroup group_mdrun
- *
- * \brief This module contains code that implements mdrun.
- */
-/*! \internal \file
- *
- * \brief This file implements mdrun
- *
- * \author Berk Hess <hess@kth.se>
- * \author David van der Spoel <david.vanderspoel@icm.uu.se>
- * \author Erik Lindahl <erik@kth.se>
- * \author Mark Abraham <mark.j.abraham@gmail.com>
- *
- * \ingroup module_mdrun
- */
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#include "gromacs/commandline/pargs.h"
-#include "gromacs/fileio/filenm.h"
-#include "gromacs/legacyheaders/macros.h"
-#include "gromacs/legacyheaders/main.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/readinp.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-#include "gromacs/mdrunutility/handlerestart.h"
-#include "gromacs/utility/fatalerror.h"
-
-#include "mdrun_main.h"
-
-/*! \brief Return whether either of the command-line parameters that
- *  will trigger a multi-simulation is set */
-static bool is_multisim_option_set(int argc, const char *const argv[])
-{
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp(argv[i], "-multi") == 0 || strcmp(argv[i], "-multidir") == 0)
-        {
-            return true;
-        }
-    }
-    return false;
-}
-
-//! Implements C-style main function for mdrun
-int gmx_mdrun(int argc, char *argv[])
-{
-    const char   *desc[] = {
-        "[THISMODULE] is the main computational chemistry engine",
-        "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
-        "but it can also perform Stochastic Dynamics, Energy Minimization,",
-        "test particle insertion or (re)calculation of energies.",
-        "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
-        "builds a Hessian matrix from single conformation.",
-        "For usual Normal Modes-like calculations, make sure that",
-        "the structure provided is properly energy-minimized.",
-        "The generated matrix can be diagonalized by [gmx-nmeig].[PAR]",
-        "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
-        "and distributes the topology over ranks if needed.",
-        "[TT]mdrun[tt] produces at least four output files.",
-        "A single log file ([TT]-g[tt]) is written.",
-        "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
-        "optionally forces.",
-        "The structure file ([TT]-c[tt]) contains the coordinates and",
-        "velocities of the last step.",
-        "The energy file ([TT]-e[tt]) contains energies, the temperature,",
-        "pressure, etc, a lot of these things are also printed in the log file.",
-        "Optionally coordinates can be written to a compressed trajectory file",
-        "([TT]-x[tt]).[PAR]",
-        "The option [TT]-dhdl[tt] is only used when free energy calculation is",
-        "turned on.[PAR]",
-        "Running mdrun efficiently in parallel is a complex topic topic,",
-        "many aspects of which are covered in the online User Guide. You",
-        "should look there for practical advice on using many of the options",
-        "available in mdrun.[PAR]",
-        "ED (essential dynamics) sampling and/or additional flooding potentials",
-        "are switched on by using the [TT]-ei[tt] flag followed by an [REF].edi[ref]",
-        "file. The [REF].edi[ref] file can be produced with the [TT]make_edi[tt] tool",
-        "or by using options in the essdyn menu of the WHAT IF program.",
-        "[TT]mdrun[tt] produces a [REF].xvg[ref] output file that",
-        "contains projections of positions, velocities and forces onto selected",
-        "eigenvectors.[PAR]",
-        "When user-defined potential functions have been selected in the",
-        "[REF].mdp[ref] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
-        "a formatted table with potential functions. The file is read from",
-        "either the current directory or from the [TT]GMXLIB[tt] directory.",
-        "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
-        "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
-        "normal Coulomb.",
-        "When pair interactions are present, a separate table for pair interaction",
-        "functions is read using the [TT]-tablep[tt] option.[PAR]",
-        "When tabulated bonded functions are present in the topology,",
-        "interaction functions are read using the [TT]-tableb[tt] option.",
-        "For each different tabulated interaction type used, a table file name must",
-        "be given. For the topology to work, a file name given here must match a",
-        "character sequence before the file extension. That sequence is: an underscore,",
-        "then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals,",
-        "and finally the matching table number index used in the topology.[PAR]",
-        "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
-        "coordinates and forces when pulling is selected",
-        "in the [REF].mdp[ref] file.[PAR]",
-        "Finally some experimental algorithms can be tested when the",
-        "appropriate options have been given. Currently under",
-        "investigation are: polarizability.",
-        "[PAR]",
-        "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed",
-        "a protein into a membrane. This module requires a number of settings",
-        "that are provided in a data file that is the argument of this option.",
-        "For more details in membrane embedding, see the documentation in the",
-        "user guide. The options [TT]-mn[tt] and [TT]-mp[tt] are used to provide",
-        "the index and topology files used for the embedding.",
-        "[PAR]",
-        "The option [TT]-pforce[tt] is useful when you suspect a simulation",
-        "crashes due to too large forces. With this option coordinates and",
-        "forces of atoms with a force larger than a certain value will",
-        "be printed to stderr.",
-        "[PAR]",
-        "Checkpoints containing the complete state of the system are written",
-        "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
-        "unless option [TT]-cpt[tt] is set to -1.",
-        "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
-        "make sure that a recent state of the system is always available,",
-        "even when the simulation is terminated while writing a checkpoint.",
-        "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
-        "with the step number.",
-        "A simulation can be continued by reading the full state from file",
-        "with option [TT]-cpi[tt]. This option is intelligent in the way that",
-        "if no checkpoint file is found, GROMACS just assumes a normal run and",
-        "starts from the first step of the [REF].tpr[ref] file. By default the output",
-        "will be appending to the existing output files. The checkpoint file",
-        "contains checksums of all output files, such that you will never",
-        "loose data when some output files are modified, corrupt or removed.",
-        "There are three scenarios with [TT]-cpi[tt]:[PAR]",
-        "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
-        "[TT]*[tt] all files are present with names and checksums matching those stored",
-        "in the checkpoint file: files are appended[PAR]",
-        "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
-        "With [TT]-noappend[tt] new output files are opened and the simulation",
-        "part number is added to all output file names.",
-        "Note that in all cases the checkpoint file itself is not renamed",
-        "and will be overwritten, unless its name does not match",
-        "the [TT]-cpo[tt] option.",
-        "[PAR]",
-        "With checkpointing the output is appended to previously written",
-        "output files, unless [TT]-noappend[tt] is used or none of the previous",
-        "output files are present (except for the checkpoint file).",
-        "The integrity of the files to be appended is verified using checksums",
-        "which are stored in the checkpoint file. This ensures that output can",
-        "not be mixed up or corrupted due to file appending. When only some",
-        "of the previous output files are present, a fatal error is generated",
-        "and no old output files are modified and no new output files are opened.",
-        "The result with appending will be the same as from a single run.",
-        "The contents will be binary identical, unless you use a different number",
-        "of ranks or dynamic load balancing or the FFT library uses optimizations",
-        "through timing.",
-        "[PAR]",
-        "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
-        "file is written at the first neighbor search step where the run time",
-        "exceeds [TT]-maxh[tt]\\*0.99 hours. This option is particularly useful in",
-        "combination with setting [TT]nsteps[tt] to -1 either in the mdp or using the",
-        "similarly named command line option. This results in an infinite run,",
-        "terminated only when the time limit set by [TT]-maxh[tt] is reached (if any)"
-        "or upon receiving a signal."
-        "[PAR]",
-        "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
-        "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
-        "pressed), it will stop after the next neighbor search step ",
-        "(with nstlist=0 at the next step).",
-        "In both cases all the usual output will be written to file.",
-        "When running with MPI, a signal to one of the [TT]mdrun[tt] ranks",
-        "is sufficient, this signal should not be sent to mpirun or",
-        "the [TT]mdrun[tt] process that is the parent of the others.",
-        "[PAR]",
-        "Interactive molecular dynamics (IMD) can be activated by using at least one",
-        "of the three IMD switches: The [TT]-imdterm[tt] switch allows to terminate",
-        "the simulation from the molecular viewer (e.g. VMD). With [TT]-imdwait[tt],",
-        "[TT]mdrun[tt] pauses whenever no IMD client is connected. Pulling from the",
-        "IMD remote can be turned on by [TT]-imdpull[tt].",
-        "The port [TT]mdrun[tt] listens to can be altered by [TT]-imdport[tt].The",
-        "file pointed to by [TT]-if[tt] contains atom indices and forces if IMD",
-        "pulling is used."
-        "[PAR]",
-        "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
-    };
-    t_commrec    *cr;
-    t_filenm      fnm[] = {
-        { efTPR, NULL,      NULL,       ffREAD },
-        { efTRN, "-o",      NULL,       ffWRITE },
-        { efCOMPRESSED, "-x", NULL,     ffOPTWR },
-        { efCPT, "-cpi",    NULL,       ffOPTRD | ffALLOW_MISSING },
-        { efCPT, "-cpo",    NULL,       ffOPTWR },
-        { efSTO, "-c",      "confout",  ffWRITE },
-        { efEDR, "-e",      "ener",     ffWRITE },
-        { efLOG, "-g",      "md",       ffWRITE },
-        { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
-        { efXVG, "-field",  "field",    ffOPTWR },
-        { efXVG, "-table",  "table",    ffOPTRD },
-        { efXVG, "-tabletf", "tabletf",    ffOPTRD },
-        { efXVG, "-tablep", "tablep",   ffOPTRD },
-        { efXVG, "-tableb", "table",    ffOPTRDMULT },
-        { efTRX, "-rerun",  "rerun",    ffOPTRD },
-        { efXVG, "-tpi",    "tpi",      ffOPTWR },
-        { efXVG, "-tpid",   "tpidist",  ffOPTWR },
-        { efEDI, "-ei",     "sam",      ffOPTRD },
-        { efXVG, "-eo",     "edsam",    ffOPTWR },
-        { efXVG, "-devout", "deviatie", ffOPTWR },
-        { efXVG, "-runav",  "runaver",  ffOPTWR },
-        { efXVG, "-px",     "pullx",    ffOPTWR },
-        { efXVG, "-pf",     "pullf",    ffOPTWR },
-        { efXVG, "-ro",     "rotation", ffOPTWR },
-        { efLOG, "-ra",     "rotangles", ffOPTWR },
-        { efLOG, "-rs",     "rotslabs", ffOPTWR },
-        { efLOG, "-rt",     "rottorque", ffOPTWR },
-        { efMTX, "-mtx",    "nm",       ffOPTWR },
-        { efNDX, "-dn",     "dipole",   ffOPTWR },
-        { efRND, "-multidir", NULL,      ffOPTRDMULT},
-        { efDAT, "-membed", "membed",   ffOPTRD },
-        { efTOP, "-mp",     "membed",   ffOPTRD },
-        { efNDX, "-mn",     "membed",   ffOPTRD },
-        { efXVG, "-if",     "imdforces", ffOPTWR },
-        { efXVG, "-swap",   "swapions", ffOPTWR }
-    };
-    const int     NFILE = asize(fnm);
-
-    /* Command line options ! */
-    gmx_bool        bDDBondCheck  = TRUE;
-    gmx_bool        bDDBondComm   = TRUE;
-    gmx_bool        bTunePME      = TRUE;
-    gmx_bool        bVerbose      = FALSE;
-    gmx_bool        bCompact      = TRUE;
-    gmx_bool        bRerunVSite   = FALSE;
-    gmx_bool        bConfout      = TRUE;
-    gmx_bool        bReproducible = FALSE;
-    gmx_bool        bIMDwait      = FALSE;
-    gmx_bool        bIMDterm      = FALSE;
-    gmx_bool        bIMDpull      = FALSE;
-
-    int             npme          = -1;
-    int             nstlist       = 0;
-    int             nmultisim     = 0;
-    int             nstglobalcomm = -1;
-    int             repl_ex_nst   = 0;
-    int             repl_ex_seed  = -1;
-    int             repl_ex_nex   = 0;
-    int             nstepout      = 100;
-    int             resetstep     = -1;
-    gmx_int64_t     nsteps        = -2;   /* the value -2 means that the mdp option will be used */
-    int             imdport       = 8888; /* can be almost anything, 8888 is easy to remember */
-
-    rvec            realddxyz          = {0, 0, 0};
-    const char     *ddno_opt[ddnoNR+1] =
-    { NULL, "interleave", "pp_pme", "cartesian", NULL };
-    const char     *dddlb_opt[] =
-    { NULL, "auto", "no", "yes", NULL };
-    const char     *thread_aff_opt[threadaffNR+1] =
-    { NULL, "auto", "on", "off", NULL };
-    const char     *nbpu_opt[] =
-    { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL };
-    real            rdd                   = 0.0, rconstr = 0.0, dlb_scale = 0.8, pforce = -1;
-    char           *ddcsx                 = NULL, *ddcsy = NULL, *ddcsz = NULL;
-    real            cpt_period            = 15.0, max_hours = -1;
-    gmx_bool        bTryToAppendFiles     = TRUE;
-    gmx_bool        bKeepAndNumCPT        = FALSE;
-    gmx_bool        bResetCountersHalfWay = FALSE;
-    output_env_t    oenv                  = NULL;
-
-    /* Non transparent initialization of a complex gmx_hw_opt_t struct.
-     * But unfortunately we are not allowed to call a function here,
-     * since declarations follow below.
-     */
-    gmx_hw_opt_t    hw_opt = {
-        0, 0, 0, 0, threadaffSEL, 0, 0,
-        { NULL, FALSE, 0, NULL }
-    };
-
-    t_pargs         pa[] = {
-
-        { "-dd",      FALSE, etRVEC, {&realddxyz},
-          "Domain decomposition grid, 0 is optimize" },
-        { "-ddorder", FALSE, etENUM, {ddno_opt},
-          "DD rank order" },
-        { "-npme",    FALSE, etINT, {&npme},
-          "Number of separate ranks to be used for PME, -1 is guess" },
-        { "-nt",      FALSE, etINT, {&hw_opt.nthreads_tot},
-          "Total number of threads to start (0 is guess)" },
-        { "-ntmpi",   FALSE, etINT, {&hw_opt.nthreads_tmpi},
-          "Number of thread-MPI threads to start (0 is guess)" },
-        { "-ntomp",   FALSE, etINT, {&hw_opt.nthreads_omp},
-          "Number of OpenMP threads per MPI rank to start (0 is guess)" },
-        { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme},
-          "Number of OpenMP threads per MPI rank to start (0 is -ntomp)" },
-        { "-pin",     FALSE, etENUM, {thread_aff_opt},
-          "Whether mdrun should try to set thread affinities" },
-        { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset},
-          "The lowest logical core number to which mdrun should pin the first thread" },
-        { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
-          "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
-        { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_opt.gpu_id},
-          "List of GPU device id-s to use, specifies the per-node PP rank to GPU mapping" },
-        { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
-          "Check for all bonded interactions with DD" },
-        { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
-          "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
-        { "-rdd",     FALSE, etREAL, {&rdd},
-          "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
-        { "-rcon",    FALSE, etREAL, {&rconstr},
-          "Maximum distance for P-LINCS (nm), 0 is estimate" },
-        { "-dlb",     FALSE, etENUM, {dddlb_opt},
-          "Dynamic load balancing (with DD)" },
-        { "-dds",     FALSE, etREAL, {&dlb_scale},
-          "Fraction in (0,1) by whose reciprocal the initial DD cell size will be increased in order to "
-          "provide a margin in which dynamic load balancing can act while preserving the minimum cell size." },
-        { "-ddcsx",   FALSE, etSTR, {&ddcsx},
-          "HIDDENA string containing a vector of the relative sizes in the x "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsy",   FALSE, etSTR, {&ddcsy},
-          "HIDDENA string containing a vector of the relative sizes in the y "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-ddcsz",   FALSE, etSTR, {&ddcsz},
-          "HIDDENA string containing a vector of the relative sizes in the z "
-          "direction of the corresponding DD cells. Only effective with static "
-          "load balancing." },
-        { "-gcom",    FALSE, etINT, {&nstglobalcomm},
-          "Global communication frequency" },
-        { "-nb",      FALSE, etENUM, {&nbpu_opt},
-          "Calculate non-bonded interactions on" },
-        { "-nstlist", FALSE, etINT, {&nstlist},
-          "Set nstlist when using a Verlet buffer tolerance (0 is guess)" },
-        { "-tunepme", FALSE, etBOOL, {&bTunePME},
-          "Optimize PME load between PP/PME ranks or GPU/CPU" },
-        { "-v",       FALSE, etBOOL, {&bVerbose},
-          "Be loud and noisy" },
-        { "-compact", FALSE, etBOOL, {&bCompact},
-          "Write a compact log file" },
-        { "-pforce",  FALSE, etREAL, {&pforce},
-          "Print all forces larger than this (kJ/mol nm)" },
-        { "-reprod",  FALSE, etBOOL, {&bReproducible},
-          "Try to avoid optimizations that affect binary reproducibility" },
-        { "-cpt",     FALSE, etREAL, {&cpt_period},
-          "Checkpoint interval (minutes)" },
-        { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
-          "Keep and number checkpoint files" },
-        { "-append",  FALSE, etBOOL, {&bTryToAppendFiles},
-          "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
-        { "-nsteps",  FALSE, etINT64, {&nsteps},
-          "Run this number of steps, overrides .mdp file option (-1 means infinite, -2 means use mdp option, smaller is invalid)" },
-        { "-maxh",   FALSE, etREAL, {&max_hours},
-          "Terminate after 0.99 times this time (hours)" },
-        { "-multi",   FALSE, etINT, {&nmultisim},
-          "Do multiple simulations in parallel" },
-        { "-replex",  FALSE, etINT, {&repl_ex_nst},
-          "Attempt replica exchange periodically with this period (steps)" },
-        { "-nex",  FALSE, etINT, {&repl_ex_nex},
-          "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion).  -nex zero or not specified gives neighbor replica exchange." },
-        { "-reseed",  FALSE, etINT, {&repl_ex_seed},
-          "Seed for replica exchange, -1 is generate a seed" },
-        { "-imdport",    FALSE, etINT, {&imdport},
-          "HIDDENIMD listening port" },
-        { "-imdwait",  FALSE, etBOOL, {&bIMDwait},
-          "HIDDENPause the simulation while no IMD client is connected" },
-        { "-imdterm",  FALSE, etBOOL, {&bIMDterm},
-          "HIDDENAllow termination of the simulation from IMD client" },
-        { "-imdpull",  FALSE, etBOOL, {&bIMDpull},
-          "HIDDENAllow pulling in the simulation from IMD client" },
-        { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
-          "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
-        { "-confout", FALSE, etBOOL, {&bConfout},
-          "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
-        { "-stepout", FALSE, etINT, {&nstepout},
-          "HIDDENFrequency of writing the remaining wall clock time for the run" },
-        { "-resetstep", FALSE, etINT, {&resetstep},
-          "HIDDENReset cycle counters after these many time steps" },
-        { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
-          "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
-    };
-    unsigned long   Flags;
-    ivec            ddxyz;
-    int             dd_node_order;
-    gmx_bool        bDoAppendFiles, bStartFromCpt;
-    FILE           *fplog;
-    int             rc;
-    char          **multidir = NULL;
-
-    cr = init_commrec();
-
-    unsigned long PCA_Flags = PCA_CAN_SET_DEFFNM;
-    // With -multi or -multidir, the file names are going to get processed
-    // further (or the working directory changed), so we can't check for their
-    // existence during parsing.  It isn't useful to do any completion based on
-    // file system contents, either.
-    if (is_multisim_option_set(argc, argv))
-    {
-        PCA_Flags |= PCA_DISABLE_INPUT_FILE_CHECKING;
-    }
-
-    /* Comment this in to do fexist calls only on master
-     * works not with rerun or tables at the moment
-     * also comment out the version of init_forcerec in md.c
-     * with NULL instead of opt2fn
-     */
-    /*
-       if (!MASTER(cr))
-       {
-       PCA_Flags |= PCA_NOT_READ_NODE;
-       }
-     */
-
-    if (!parse_common_args(&argc, argv, PCA_Flags, NFILE, fnm, asize(pa), pa,
-                           asize(desc), desc, 0, NULL, &oenv))
-    {
-        return 0;
-    }
-
-
-    /* we set these early because they might be used in init_multisystem()
-       Note that there is the potential for npme>nnodes until the number of
-       threads is set later on, if there's thread parallelization. That shouldn't
-       lead to problems. */
-    dd_node_order = nenum(ddno_opt);
-    cr->npmenodes = npme;
-
-    hw_opt.thread_affinity = nenum(thread_aff_opt);
-
-    /* now check the -multi and -multidir option */
-    if (opt2bSet("-multidir", NFILE, fnm))
-    {
-        if (nmultisim > 0)
-        {
-            gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
-        }
-        nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
-    }
-
-
-    if (repl_ex_nst != 0 && nmultisim < 2)
-    {
-        gmx_fatal(FARGS, "Need at least two replicas for replica exchange (option -multi)");
-    }
-
-    if (repl_ex_nex < 0)
-    {
-        gmx_fatal(FARGS, "Replica exchange number of exchanges needs to be positive");
-    }
-
-    if (nmultisim >= 1)
-    {
-#ifndef GMX_THREAD_MPI
-        gmx_bool bParFn = (multidir == NULL);
-        init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
-#else
-        gmx_fatal(FARGS, "mdrun -multi or -multidir are not supported with the thread-MPI library. "
-                  "Please compile GROMACS with a proper external MPI library.");
-#endif
-    }
-
-    handleRestart(cr, bTryToAppendFiles, NFILE, fnm,
-                  &bDoAppendFiles, &bStartFromCpt);
-
-    Flags = opt2bSet("-rerun", NFILE, fnm) ? MD_RERUN : 0;
-    Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
-    Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
-    Flags = Flags | (bTunePME      ? MD_TUNEPME      : 0);
-    Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
-    Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
-    Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
-    Flags = Flags | (bDoAppendFiles ? MD_APPENDFILES  : 0);
-    Flags = Flags | (opt2parg_bSet("-append", asize(pa), pa) ? MD_APPENDFILESSET : 0);
-    Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0);
-    Flags = Flags | (bStartFromCpt ? MD_STARTFROMCPT : 0);
-    Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
-    Flags = Flags | (opt2parg_bSet("-ntomp", asize(pa), pa) ? MD_NTOMPSET : 0);
-    Flags = Flags | (bIMDwait      ? MD_IMDWAIT      : 0);
-    Flags = Flags | (bIMDterm      ? MD_IMDTERM      : 0);
-    Flags = Flags | (bIMDpull      ? MD_IMDPULL      : 0);
-
-    /* We postpone opening the log file if we are appending, so we can
-       first truncate the old log file and append to the correct position
-       there instead.  */
-    if (MASTER(cr) && !bDoAppendFiles)
-    {
-        gmx_log_open(ftp2fn(efLOG, NFILE, fnm), cr,
-                     Flags & MD_APPENDFILES, &fplog);
-    }
-    else
-    {
-        fplog = NULL;
-    }
-
-    ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
-    ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
-    ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
-
-    rc = mdrunner(&hw_opt, fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact,
-                  nstglobalcomm, ddxyz, dd_node_order, rdd, rconstr,
-                  dddlb_opt[0], dlb_scale, ddcsx, ddcsy, ddcsz,
-                  nbpu_opt[0], nstlist,
-                  nsteps, nstepout, resetstep,
-                  nmultisim, repl_ex_nst, repl_ex_nex, repl_ex_seed,
-                  pforce, cpt_period, max_hours, imdport, Flags);
-
-    /* Log file has to be closed in mdrunner if we are appending to it
-       (fplog not set here) */
-    if (MASTER(cr) && !bDoAppendFiles)
-    {
-        gmx_log_close(fplog);
-    }
-
-    return rc;
-}
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.cpp b/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.cpp
deleted file mode 100644
index e799bc66964a891d20d901b8fd0e543a9cfc03a3..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.cpp
+++ /dev/null
@@ -1,1542 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-#include "gmxpre.h"
-
-#include "repl_ex.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/legacyheaders/copyrite.h"
-#include "gromacs/legacyheaders/main.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/random/random.h"
-#include "gromacs/utility/smalloc.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-/* END PLUMED */
-
-/* PLUMED HREX */
-extern int plumed_hrex;
-/* END PLUMED HREX */
-
-#define PROBABILITYCUTOFF 100
-/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
-
-enum {
-    ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR
-};
-const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"};
-/* end_single_marker merely notes the end of single variable replica exchange. All types higher than
-   it are multiple replica exchange methods */
-/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?;
-   Let's wait until we feel better about the pressure control methods giving exact ensembles.  Right now, we assume constant pressure  */
-
-typedef struct gmx_repl_ex
-{
-    int       repl;
-    int       nrepl;
-    real      temp;
-    int       type;
-    real    **q;
-    gmx_bool  bNPT;
-    real     *pres;
-    int      *ind;
-    int      *allswaps;
-    int       nst;
-    int       nex;
-    int       seed;
-    int       nattempt[2];
-    real     *prob_sum;
-    int     **nmoves;
-    int      *nexchange;
-    gmx_rng_t rng;
-
-    /* these are helper arrays for replica exchange; allocated here so they
-       don't have to be allocated each time */
-    int      *destinations;
-    int     **cyclic;
-    int     **order;
-    int      *tmpswap;
-    gmx_bool *incycle;
-    gmx_bool *bEx;
-
-    /* helper arrays to hold the quantities that are exchanged */
-    real  *prob;
-    real  *Epot;
-    real  *beta;
-    real  *Vol;
-    real **de;
-
-} t_gmx_repl_ex;
-
-static gmx_bool repl_quantity(const gmx_multisim_t *ms,
-                              struct gmx_repl_ex *re, int ere, real q)
-{
-    real    *qall;
-    gmx_bool bDiff;
-    int      s;
-
-    snew(qall, ms->nsim);
-    qall[re->repl] = q;
-    gmx_sum_sim(ms->nsim, qall, ms);
-
-    /* PLUMED */
-    //bDiff = FALSE;
-    //for (s = 1; s < ms->nsim; s++)
-    //{
-    //    if (qall[s] != qall[0])
-    //    {
-            bDiff = TRUE;
-    //    }
-    //}
-    /* END PLUMED */
-
-    if (bDiff)
-    {
-        /* Set the replica exchange type and quantities */
-        re->type = ere;
-
-        snew(re->q[ere], re->nrepl);
-        for (s = 0; s < ms->nsim; s++)
-        {
-            re->q[ere][s] = qall[s];
-        }
-    }
-    sfree(qall);
-    return bDiff;
-}
-
-gmx_repl_ex_t init_replica_exchange(FILE *fplog,
-                                    const gmx_multisim_t *ms,
-                                    const t_state *state,
-                                    const t_inputrec *ir,
-                                    int nst, int nex, int init_seed)
-{
-    real                pres;
-    int                 i, j, k;
-    struct gmx_repl_ex *re;
-    gmx_bool            bTemp;
-    gmx_bool            bLambda = FALSE;
-
-    fprintf(fplog, "\nInitializing Replica Exchange\n");
-
-    if (ms == NULL || ms->nsim == 1)
-    {
-        gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?");
-    }
-    if (!EI_DYNAMICS(ir->eI))
-    {
-        gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations");
-        /* Note that PAR(cr) is defined by cr->nnodes > 1, which is
-         * distinct from MULTISIM(cr). A multi-simulation only runs
-         * with real MPI parallelism, but this does not imply PAR(cr)
-         * is true!
-         *
-         * Since we are using a dynamical integrator, the only
-         * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are
-         * synonymous. The only way for cr->nnodes > 1 to be true is
-         * if we are using DD. */
-    }
-
-    snew(re, 1);
-
-    re->repl     = ms->sim;
-    re->nrepl    = ms->nsim;
-    snew(re->q, ereENDSINGLE);
-
-    fprintf(fplog, "Repl  There are %d replicas:\n", re->nrepl);
-
-    check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE);
-    check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE);
-    check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE);
-    check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst,
-                      "first exchange step: init_step/-replex", FALSE);
-    check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE);
-    check_multi_int(fplog, ms, ir->opts.ngtc,
-                    "the number of temperature coupling groups", FALSE);
-    check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE);
-    check_multi_int(fplog, ms, ir->efep, "free energy", FALSE);
-    check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE);
-
-    re->temp = ir->opts.ref_t[0];
-    for (i = 1; (i < ir->opts.ngtc); i++)
-    {
-        if (ir->opts.ref_t[i] != re->temp)
-        {
-            fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
-            fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
-        }
-    }
-
-    re->type = -1;
-    bTemp    = repl_quantity(ms, re, ereTEMP, re->temp);
-    if (ir->efep != efepNO)
-    {
-        bLambda = repl_quantity(ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state);
-    }
-    if (re->type == -1)  /* nothing was assigned */
-    {
-        gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl);
-    }
-    if (bLambda && bTemp)
-    {
-        re->type = ereTL;
-    }
-
-    if (bTemp)
-    {
-        please_cite(fplog, "Sugita1999a");
-        if (ir->epc != epcNO)
-        {
-            re->bNPT = TRUE;
-            fprintf(fplog, "Repl  Using Constant Pressure REMD.\n");
-            please_cite(fplog, "Okabe2001a");
-        }
-        if (ir->etc == etcBERENDSEN)
-        {
-            gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead",
-                      ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE));
-        }
-    }
-    if (bLambda)
-    {
-        if (ir->fepvals->delta_lambda != 0)   /* check this? */
-        {
-            gmx_fatal(FARGS, "delta_lambda is not zero");
-        }
-    }
-    if (re->bNPT)
-    {
-        snew(re->pres, re->nrepl);
-        if (ir->epct == epctSURFACETENSION)
-        {
-            pres = ir->ref_p[ZZ][ZZ];
-        }
-        else
-        {
-            pres = 0;
-            j    = 0;
-            for (i = 0; i < DIM; i++)
-            {
-                if (ir->compress[i][i] != 0)
-                {
-                    pres += ir->ref_p[i][i];
-                    j++;
-                }
-            }
-            pres /= j;
-        }
-        re->pres[re->repl] = pres;
-        gmx_sum_sim(re->nrepl, re->pres, ms);
-    }
-
-    /* Make an index for increasing replica order */
-    /* only makes sense if one or the other is varying, not both!
-       if both are varying, we trust the order the person gave. */
-    snew(re->ind, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->ind[i] = i;
-    }
-
-    /* PLUMED */
-    // plumed2: check if we want alternative patterns (i.e. for bias-exchange metaD)
-    // in those cases replicas can share the same temperature.
-    /*
-    if (re->type < ereENDSINGLE)
-    {
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = i+1; j < re->nrepl; j++)
-            {
-                if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]])
-                {*/
-                    /* Unordered replicas are supposed to work, but there
-                     * is still an issues somewhere.
-                     * Note that at this point still re->ind[i]=i.
-                     */
-                 /*
-                    gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s",
-                              i, j,
-                              erename[re->type],
-                              re->q[re->type][i], re->q[re->type][j],
-                              erename[re->type]);
-
-                    k          = re->ind[i];
-                    re->ind[i] = re->ind[j];
-                    re->ind[j] = k;
-                }
-                else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]])
-                {
-                    gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]);
-                }
-            }
-        }
-    }
-    */
-    /* END PLUMED */
-
-    /* keep track of all the swaps, starting with the initial placement. */
-    snew(re->allswaps, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->allswaps[i] = re->ind[i];
-    }
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            fprintf(fplog, "\nReplica exchange in temperature\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereLAMBDA:
-            fprintf(fplog, "\nReplica exchange in lambda\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereTL:
-            fprintf(fplog, "\nReplica exchange in temperature and lambda state\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        default:
-            gmx_incons("Unknown replica exchange quantity");
-    }
-    if (re->bNPT)
-    {
-        fprintf(fplog, "\nRepl  p");
-        for (i = 0; i < re->nrepl; i++)
-        {
-            fprintf(fplog, " %5.2f", re->pres[re->ind[i]]);
-        }
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]]))
-            {
-                fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
-                fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
-            }
-        }
-    }
-    re->nst = nst;
-    if (init_seed == -1)
-    {
-        if (MASTERSIM(ms))
-        {
-            re->seed = (int)gmx_rng_make_seed();
-        }
-        else
-        {
-            re->seed = 0;
-        }
-        gmx_sumi_sim(1, &(re->seed), ms);
-    }
-    else
-    {
-        re->seed = init_seed;
-    }
-    fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst);
-    fprintf(fplog, "\nReplica random seed: %d\n", re->seed);
-    re->rng = gmx_rng_init(re->seed);
-
-    re->nattempt[0] = 0;
-    re->nattempt[1] = 0;
-
-    snew(re->prob_sum, re->nrepl);
-    snew(re->nexchange, re->nrepl);
-    snew(re->nmoves, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->nmoves[i], re->nrepl);
-    }
-    fprintf(fplog, "Replica exchange information below: x=exchange, pr=probability\n");
-
-    /* generate space for the helper functions so we don't have to snew each time */
-
-    snew(re->destinations, re->nrepl);
-    snew(re->incycle, re->nrepl);
-    snew(re->tmpswap, re->nrepl);
-    snew(re->cyclic, re->nrepl);
-    snew(re->order, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->cyclic[i], re->nrepl+1);
-        snew(re->order[i], re->nrepl);
-    }
-    /* allocate space for the functions storing the data for the replicas */
-    /* not all of these arrays needed in all cases, but they don't take
-       up much space, since the max size is nrepl**2 */
-    snew(re->prob, re->nrepl);
-    snew(re->bEx, re->nrepl);
-    snew(re->beta, re->nrepl);
-    snew(re->Vol, re->nrepl);
-    snew(re->Epot, re->nrepl);
-    snew(re->de, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->de[i], re->nrepl);
-    }
-    re->nex = nex;
-    return re;
-}
-
-static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n)
-{
-    real *buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-
-static void exchange_ints(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, int *v, int n)
-{
-    int *buf;
-    int  i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
-             buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
-             ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n)
-{
-    double *buf;
-    int     i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n)
-{
-    rvec *buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v[0],  n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            copy_rvec(buf[i], v[i]);
-        }
-        sfree(buf);
-    }
-}
-
-void exchange_state(const gmx_multisim_t *ms, int b, t_state *state)
-{
-    /* When t_state changes, this code should be updated. */
-    int ngtc, nnhpres;
-    ngtc    = state->ngtc * state->nhchainlength;
-    nnhpres = state->nnhpres* state->nhchainlength;
-    exchange_rvecs(ms, b, state->box, DIM);
-    exchange_rvecs(ms, b, state->box_rel, DIM);
-    exchange_rvecs(ms, b, state->boxv, DIM);
-    exchange_reals(ms, b, &(state->veta), 1);
-    exchange_reals(ms, b, &(state->vol0), 1);
-    exchange_rvecs(ms, b, state->svir_prev, DIM);
-    exchange_rvecs(ms, b, state->fvir_prev, DIM);
-    exchange_rvecs(ms, b, state->pres_prev, DIM);
-    exchange_doubles(ms, b, state->nosehoover_xi, ngtc);
-    exchange_doubles(ms, b, state->nosehoover_vxi, ngtc);
-    exchange_doubles(ms, b, state->nhpres_xi, nnhpres);
-    exchange_doubles(ms, b, state->nhpres_vxi, nnhpres);
-    exchange_doubles(ms, b, state->therm_integral, state->ngtc);
-    exchange_rvecs(ms, b, state->x, state->natoms);
-    exchange_rvecs(ms, b, state->v, state->natoms);
-    exchange_rvecs(ms, b, state->sd_X, state->natoms);
-}
-
-static void copy_rvecs(rvec *s, rvec *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            copy_rvec(s[i], d[i]);
-        }
-    }
-}
-
-static void copy_doubles(const double *s, double *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            d[i] = s[i];
-        }
-    }
-}
-
-static void copy_reals(const real *s, real *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            d[i] = s[i];
-        }
-    }
-}
-
-static void copy_ints(const int *s, int *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            d[i] = s[i];
-        }
-    }
-}
-
-#define scopy_rvecs(v, n)   copy_rvecs(state->v, state_local->v, n);
-#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n);
-#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n);
-#define scopy_ints(v, n)   copy_ints(state->v, state_local->v, n);
-
-void copy_state_nonatomdata(t_state *state, t_state *state_local)
-{
-    /* When t_state changes, this code should be updated. */
-    int ngtc, nnhpres;
-    ngtc    = state->ngtc * state->nhchainlength;
-    nnhpres = state->nnhpres* state->nhchainlength;
-    scopy_rvecs(box, DIM);
-    scopy_rvecs(box_rel, DIM);
-    scopy_rvecs(boxv, DIM);
-    state_local->veta = state->veta;
-    state_local->vol0 = state->vol0;
-    scopy_rvecs(svir_prev, DIM);
-    scopy_rvecs(fvir_prev, DIM);
-    scopy_rvecs(pres_prev, DIM);
-    scopy_doubles(nosehoover_xi, ngtc);
-    scopy_doubles(nosehoover_vxi, ngtc);
-    scopy_doubles(nhpres_xi, nnhpres);
-    scopy_doubles(nhpres_vxi, nnhpres);
-    scopy_doubles(therm_integral, state->ngtc);
-    scopy_rvecs(x, state->natoms);
-    scopy_rvecs(v, state->natoms);
-    scopy_rvecs(sd_X, state->natoms);
-    copy_ints(&(state->fep_state), &(state_local->fep_state), 1);
-    scopy_reals(lambda, efptNR);
-}
-
-static void scale_velocities(t_state *state, real fac)
-{
-    int i;
-
-    if (state->v)
-    {
-        for (i = 0; i < state->natoms; i++)
-        {
-            svmul(fac, state->v[i], state->v[i]);
-        }
-    }
-}
-
-static void print_transition_matrix(FILE *fplog, int n, int **nmoves, int *nattempt)
-{
-    int   i, j, ntot;
-    float Tprint;
-
-    ntot = nattempt[0] + nattempt[1];
-    fprintf(fplog, "\n");
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "    ");  /* put the title closer to the center */
-    }
-    fprintf(fplog, "Empirical Transition Matrix\n");
-
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%8d", (i+1));
-    }
-    fprintf(fplog, "\n");
-
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "Repl");
-        for (j = 0; j < n; j++)
-        {
-            Tprint = 0.0;
-            if (nmoves[i][j] > 0)
-            {
-                Tprint = nmoves[i][j]/(2.0*ntot);
-            }
-            fprintf(fplog, "%8.4f", Tprint);
-        }
-        fprintf(fplog, "%3d\n", i);
-    }
-}
-
-static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s %2d", leg, ind[0]);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap)
-{
-    int i;
-
-    for (i = 0; i < n; i++)
-    {
-        tmpswap[i] = allswaps[i];
-    }
-    for (i = 0; i < n; i++)
-    {
-        allswaps[i] = tmpswap[pind[i]];
-    }
-
-    fprintf(fplog, "\nAccepted Exchanges:   ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", pind[i]);
-    }
-    fprintf(fplog, "\n");
-
-    /* the "Order After Exchange" is the state label corresponding to the configuration that
-       started in state listed in order, i.e.
-
-       3 0 1 2
-
-       means that the:
-       configuration starting in simulation 3 is now in simulation 0,
-       configuration starting in simulation 0 is now in simulation 1,
-       configuration starting in simulation 1 is now in simulation 2,
-       configuration starting in simulation 2 is now in simulation 3
-     */
-    fprintf(fplog, "Order After Exchange: ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", allswaps[i]);
-    }
-    fprintf(fplog, "\n\n");
-}
-
-static void print_prob(FILE *fplog, const char *leg, int n, real *prob)
-{
-    int  i;
-    char buf[8];
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        if (prob[i] >= 0)
-        {
-            sprintf(buf, "%4.2f", prob[i]);
-            fprintf(fplog, "  %3s", buf[0] == '1' ? "1.0" : buf+1);
-        }
-        else
-        {
-            fprintf(fplog, "     ");
-        }
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_count(FILE *fplog, const char *leg, int n, int *count)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %4d", count[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp)
-{
-
-    real   ediff, dpV, delta = 0;
-    real  *Epot = re->Epot;
-    real  *Vol  = re->Vol;
-    real **de   = re->de;
-    real  *beta = re->beta;
-
-    /* Two cases; we are permuted and not.  In all cases, setting ap = a and bp = b will reduce
-       to the non permuted case */
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            /*
-             * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
-             */
-            ediff = Epot[b] - Epot[a];
-            delta = -(beta[bp] - beta[ap])*ediff;
-            break;
-        case ereLAMBDA:
-            /* two cases:  when we are permuted, and not.  */
-            /* non-permuted:
-               ediff =  E_new - E_old
-                     =  [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)]
-                     =  [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)]
-                     =  de[b][a] + de[a][b] */
-
-            /* permuted:
-               ediff =  E_new - E_old
-                     =  [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)]
-                     =  [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)]
-                     =  (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b])    */
-            /* but, in the current code implementation, we flip configurations, not indices . . .
-               So let's examine that.
-                     =  [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)]
-                     =  [H_b(x_ap) - H_a(x_ap)]  + [H_a(x_bp) - H_b(x_pb)]
-                     = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp]
-                     So, if we exchange b<=> bp and a<=> ap, we return to the same result.
-                     So the simple solution is to flip the
-                     position of perturbed and original indices in the tests.
-             */
-
-            ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]);
-            delta = ediff*beta[a]; /* assume all same temperature in this case */
-            break;
-        case ereTL:
-            /* not permuted:  */
-            /* delta =  reduced E_new - reduced E_old
-                     =  [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)]
-                     =  [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] +
-                        [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + [beta_a dH_a(x_b) +
-                        beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b))
-                     =  beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */
-            /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */
-            /* permuted (big breath!) */
-            /*   delta =  reduced E_new - reduced E_old
-                     =  [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                        - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a)
-                        - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] +
-                        [(beta_ap H_ap(x_b)  - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))]
-             + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] +
-                        [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))]
-             + beta_pb (H_a(x_a) - H_b(x_b))  - beta_ap (H_a(x_a) - H_b(x_b))
-                     =  ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b]  - beta_bp de[bp][b])
-             + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b))  */
-            delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]);
-            break;
-        default:
-            gmx_incons("Unknown replica exchange quantity");
-    }
-    if (bPrint)
-    {
-        fprintf(fplog, "Repl %d <-> %d  dE_term = %10.3e (kT)\n", a, b, delta);
-    }
-
-/* this is necessary because with plumed HREX the energy contribution is
-   already taken into account */
-    if(plumed_hrex) delta=0.0;
-
-    if (re->bNPT)
-    {
-        /* revist the calculation for 5.0.  Might be some improvements. */
-        dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC;
-        if (bPrint)
-        {
-            fprintf(fplog, "  dpV = %10.3e  d = %10.3e\n", dpV, delta + dpV);
-        }
-        delta += dpV;
-    }
-    return delta;
-}
-
-static void
-test_for_replica_exchange(FILE                 *fplog,
-                          const gmx_multisim_t *ms,
-                          struct gmx_repl_ex   *re,
-                          gmx_enerdata_t       *enerd,
-                          real                  vol,
-                          gmx_int64_t           step,
-                          real                  time)
-{
-    int       m, i, j, a, b, ap, bp, i0, i1, tmp;
-    real      delta = 0;
-    gmx_bool  bPrint, bMultiEx;
-    gmx_bool *bEx      = re->bEx;
-    real     *prob     = re->prob;
-    int      *pind     = re->destinations; /* permuted index */
-    gmx_bool  bEpot    = FALSE;
-    gmx_bool  bDLambda = FALSE;
-    gmx_bool  bVol     = FALSE;
-
-    bMultiEx = (re->nex > 1);  /* multiple exchanges at each state */
-    fprintf(fplog, "Replica exchange at step %" GMX_PRId64 " time %.5f\n", step, time);
-
-    if (re->bNPT)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Vol[i] = 0;
-        }
-        bVol               = TRUE;
-        re->Vol[re->repl]  = vol;
-    }
-    if ((re->type == ereTEMP || re->type == ereTL))
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Epot[i] = 0;
-        }
-        bEpot              = TRUE;
-        re->Epot[re->repl] = enerd->term[F_EPOT];
-        /* temperatures of different states*/
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ);
-        }
-    }
-    else
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0/(re->temp*BOLTZ);  /* we have a single temperature */
-        }
-    }
-    if (re->type == ereLAMBDA || re->type == ereTL)
-    {
-        bDLambda = TRUE;
-        /* lambda differences. */
-        /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian
-           minus the energy of the jth simulation in the jth Hamiltonian */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->de[i][j] = 0;
-            }
-        }
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]);
-        }
-    }
-
-    /* now actually do the communication */
-    if (bVol)
-    {
-        gmx_sum_sim(re->nrepl, re->Vol, ms);
-    }
-    if (bEpot)
-    {
-        gmx_sum_sim(re->nrepl, re->Epot, ms);
-    }
-    if (bDLambda)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            gmx_sum_sim(re->nrepl, re->de[i], ms);
-        }
-    }
-
-    /* make a duplicate set of indices for shuffling */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        pind[i] = re->ind[i];
-    }
-
-    /* PLUMED */
-    int plumed_test_exchange_pattern=0;
-    /* END PLUMED */
-
-    if(plumed_test_exchange_pattern && plumed_hrex) gmx_fatal(FARGS,"hrex not compatible with ad hoc exchange patterns");
-
-    if (bMultiEx)
-    {
-        /* multiple random switch exchange */
-        int nself = 0;
-        for (i = 0; i < re->nex + nself; i++)
-        {
-            double rnd[2];
-
-            gmx_rng_cycle_2uniform(step, i*2, re->seed, RND_SEED_REPLEX, rnd);
-            /* randomly select a pair  */
-            /* in theory, could reduce this by identifying only which switches had a nonneglibible
-               probability of occurring (log p > -100) and only operate on those switches */
-            /* find out which state it is from, and what label that state currently has. Likely
-               more work that useful. */
-            i0 = (int)(re->nrepl*rnd[0]);
-            i1 = (int)(re->nrepl*rnd[1]);
-            if (i0 == i1)
-            {
-                nself++;
-                continue;  /* self-exchange, back up and do it again */
-            }
-
-            a  = re->ind[i0]; /* what are the indices of these states? */
-            b  = re->ind[i1];
-            ap = pind[i0];
-            bp = pind[i1];
-
-            bPrint = FALSE; /* too noisy */
-            /* calculate the energy difference */
-            /* if the code changes to flip the STATES, rather than the configurations,
-               use the commented version of the code */
-            /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */
-            delta = calc_delta(fplog, bPrint, re, ap, bp, a, b);
-
-            /* we actually only use the first space in the prob and bEx array,
-               since there are actually many switches between pairs. */
-
-            if (delta <= 0)
-            {
-                /* accepted */
-                prob[0] = 1;
-                bEx[0]  = TRUE;
-            }
-            else
-            {
-                if (delta > PROBABILITYCUTOFF)
-                {
-                    prob[0] = 0;
-                }
-                else
-                {
-                    prob[0] = exp(-delta);
-                }
-                /* roll a number to determine if accepted */
-                gmx_rng_cycle_2uniform(step, i*2+1, re->seed, RND_SEED_REPLEX, rnd);
-                bEx[0] = rnd[0] < prob[0];
-            }
-            re->prob_sum[0] += prob[0];
-
-            if (bEx[0])
-            {
-                /* swap the states */
-                tmp      = pind[i0];
-                pind[i0] = pind[i1];
-                pind[i1] = tmp;
-            }
-        }
-        re->nattempt[0]++;  /* keep track of total permutation trials here */
-        print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap);
-    }
-    else
-    {
-        /* standard nearest neighbor replica exchange */
-
-        m = (step / re->nst) % 2;
-        /* PLUMED */
-        if(plumedswitch){
-          int partner=re->repl;
-          plumed_cmd(plumedmain,"getExchangesFlag",&plumed_test_exchange_pattern);
-          if(plumed_test_exchange_pattern>0){
-            int *list;
-            snew(list,re->nrepl);
-            plumed_cmd(plumedmain,"setNumberOfReplicas",&(re->nrepl));
-            plumed_cmd(plumedmain,"getExchangesList",list);
-            for(i=0; i<re->nrepl; i++) re->ind[i]=list[i];
-            sfree(list);
-          }
-
-          for(i=1; i<re->nrepl; i++) {
-            if (i % 2 != m) continue;
-            a = re->ind[i-1];
-            b = re->ind[i];
-            if(re->repl==a) partner=b;
-            if(re->repl==b) partner=a;
-          }
-          plumed_cmd(plumedmain,"GREX setPartner",&partner);
-          plumed_cmd(plumedmain,"GREX calculate",NULL);
-          plumed_cmd(plumedmain,"GREX shareAllDeltaBias",NULL);
-        }
-        /* END PLUMED */
-        for (i = 1; i < re->nrepl; i++)
-        {
-            a = re->ind[i-1];
-            b = re->ind[i];
-
-            bPrint = (re->repl == a || re->repl == b);
-            if (i % 2 == m)
-            {
-                delta = calc_delta(fplog, bPrint, re, a, b, a, b);
-
-                /* PLUMED */
-                if(plumedswitch){
-                  real adb,bdb,dplumed;
-                  char buf[300];
-                  sprintf(buf,"GREX getDeltaBias %d",a); plumed_cmd(plumedmain,buf,&adb);
-                  sprintf(buf,"GREX getDeltaBias %d",b); plumed_cmd(plumedmain,buf,&bdb);
-                  dplumed=adb*re->beta[a]+bdb*re->beta[b];
-                  delta+=dplumed;
-                  if (bPrint)
-                    fprintf(fplog,"dplumed = %10.3e  dE_Term = %10.3e (kT)\n",dplumed,delta);
-                }
-                /* END PLUMED */
-                if (delta <= 0)
-                {
-                    /* accepted */
-                    prob[i] = 1;
-                    bEx[i]  = TRUE;
-                }
-                else
-                {
-                    double rnd[2];
-
-                    if (delta > PROBABILITYCUTOFF)
-                    {
-                        prob[i] = 0;
-                    }
-                    else
-                    {
-                        prob[i] = exp(-delta);
-                    }
-                    /* roll a number to determine if accepted */
-                    gmx_rng_cycle_2uniform(step, i, re->seed, RND_SEED_REPLEX, rnd);
-                    bEx[i] = rnd[0] < prob[i];
-                }
-                re->prob_sum[i] += prob[i];
-
-                if (bEx[i])
-                {
-                  /* PLUMED */
-                  if(!plumed_test_exchange_pattern) {
-                    /* standard neighbour swapping */
-                    /* swap these two */
-                    tmp       = pind[i-1];
-                    pind[i-1] = pind[i];
-                    pind[i]   = tmp;
-                    re->nexchange[i]++;  /* statistics for back compatibility */
-                  } else {
-                    /* alternative swapping patterns */
-                    tmp       = pind[a];
-                    pind[a]   = pind[b];
-                    pind[b]   = tmp;
-                    re->nexchange[i]++;  /* statistics for back compatibility */
-                  }
-                  /* END PLUMED */
-                }
-            }
-            else
-            {
-                prob[i] = -1;
-                bEx[i]  = FALSE;
-            }
-        }
-        /* print some statistics */
-        print_ind(fplog, "ex", re->nrepl, re->ind, bEx);
-        print_prob(fplog, "pr", re->nrepl, prob);
-        fprintf(fplog, "\n");
-        re->nattempt[m]++;
-    }
-
-    /* PLUMED */
-    if(plumed_test_exchange_pattern>0) {
-      for (i = 0; i < re->nrepl; i++)
-      {
-          re->ind[i] = i;
-      }
-    }
-    /* END PLUMED */
-
-    /* record which moves were made and accepted */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->nmoves[re->ind[i]][pind[i]] += 1;
-        re->nmoves[pind[i]][re->ind[i]] += 1;
-    }
-    fflush(fplog); /* make sure we can see what the last exchange was */
-}
-
-static void write_debug_x(t_state *state)
-{
-    int i;
-
-    if (debug)
-    {
-        for (i = 0; i < state->natoms; i += 10)
-        {
-            fprintf(debug, "dx %5d %10.5f %10.5f %10.5f\n", i, state->x[i][XX], state->x[i][YY], state->x[i][ZZ]);
-        }
-    }
-}
-
-static void
-cyclic_decomposition(const int *destinations,
-                     int      **cyclic,
-                     gmx_bool  *incycle,
-                     const int  nrepl,
-                     int       *nswap)
-{
-
-    int i, j, c, p;
-    int maxlen = 1;
-    for (i = 0; i < nrepl; i++)
-    {
-        incycle[i] = FALSE;
-    }
-    for (i = 0; i < nrepl; i++)  /* one cycle for each replica */
-    {
-        if (incycle[i])
-        {
-            cyclic[i][0] = -1;
-            continue;
-        }
-        cyclic[i][0] = i;
-        incycle[i]   = TRUE;
-        c            = 1;
-        p            = i;
-        for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */
-        {
-            p = destinations[p];    /* start permuting */
-            if (p == i)
-            {
-                cyclic[i][c] = -1;
-                if (c > maxlen)
-                {
-                    maxlen = c;
-                }
-                break; /* we've reached the original element, the cycle is complete, and we marked the end. */
-            }
-            else
-            {
-                cyclic[i][c] = p;  /* each permutation gives a new member of the cycle */
-                incycle[p]   = TRUE;
-                c++;
-            }
-        }
-    }
-    *nswap = maxlen - 1;
-
-    if (debug)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(debug, "Cycle %d:", i);
-            for (j = 0; j < nrepl; j++)
-            {
-                if (cyclic[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", cyclic[i][j]);
-            }
-            fprintf(debug, "\n");
-        }
-        fflush(debug);
-    }
-}
-
-static void
-compute_exchange_order(FILE     *fplog,
-                       int     **cyclic,
-                       int     **order,
-                       const int nrepl,
-                       const int maxswap)
-{
-    int i, j;
-
-    for (j = 0; j < maxswap; j++)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            if (cyclic[i][j+1] >= 0)
-            {
-                order[cyclic[i][j+1]][j] = cyclic[i][j];
-                order[cyclic[i][j]][j]   = cyclic[i][j+1];
-            }
-        }
-        for (i = 0; i < nrepl; i++)
-        {
-            if (order[i][j] < 0)
-            {
-                order[i][j] = i; /* if it's not exchanging, it should stay this round*/
-            }
-        }
-    }
-
-    if (debug)
-    {
-        fprintf(fplog, "Replica Exchange Order\n");
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(fplog, "Replica %d:", i);
-            for (j = 0; j < maxswap; j++)
-            {
-                if (order[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", order[i][j]);
-            }
-            fprintf(fplog, "\n");
-        }
-        fflush(fplog);
-    }
-}
-
-static void
-prepare_to_do_exchange(FILE               *fplog,
-                       struct gmx_repl_ex *re,
-                       const int           replica_id,
-                       int                *maxswap,
-                       gmx_bool           *bThisReplicaExchanged)
-{
-    int i, j;
-    /* Hold the cyclic decomposition of the (multiple) replica
-     * exchange. */
-    gmx_bool bAnyReplicaExchanged = FALSE;
-    *bThisReplicaExchanged = FALSE;
-
-    for (i = 0; i < re->nrepl; i++)
-    {
-        if (re->destinations[i] != re->ind[i])
-        {
-            /* only mark as exchanged if the index has been shuffled */
-            bAnyReplicaExchanged = TRUE;
-            break;
-        }
-    }
-    if (bAnyReplicaExchanged)
-    {
-        /* reinitialize the placeholder arrays */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->cyclic[i][j] = -1;
-                re->order[i][j]  = -1;
-            }
-        }
-
-        /* Identify the cyclic decomposition of the permutation (very
-         * fast if neighbor replica exchange). */
-        cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap);
-
-        /* Now translate the decomposition into a replica exchange
-         * order at each step. */
-        compute_exchange_order(fplog, re->cyclic, re->order, re->nrepl, *maxswap);
-
-        /* Did this replica do any exchange at any point? */
-        for (j = 0; j < *maxswap; j++)
-        {
-            if (replica_id != re->order[replica_id][j])
-            {
-                *bThisReplicaExchanged = TRUE;
-                break;
-            }
-        }
-    }
-}
-
-gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re,
-                          t_state *state, gmx_enerdata_t *enerd,
-                          t_state *state_local, gmx_int64_t step, real time)
-{
-    int j;
-    int replica_id = 0;
-    int exchange_partner;
-    int maxswap = 0;
-    /* Number of rounds of exchanges needed to deal with any multiple
-     * exchanges. */
-    /* Where each replica ends up after the exchange attempt(s). */
-    /* The order in which multiple exchanges will occur. */
-    gmx_bool bThisReplicaExchanged = FALSE;
-
-    /* PLUMED */
-    if(plumedswitch)plumed_cmd(plumedmain,"GREX prepare",NULL);
-    /* END PLUMED */
-
-    if (MASTER(cr))
-    {
-        replica_id  = re->repl;
-        test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time);
-        prepare_to_do_exchange(fplog, re, replica_id, &maxswap, &bThisReplicaExchanged);
-    }
-    /* Do intra-simulation broadcast so all processors belonging to
-     * each simulation know whether they need to participate in
-     * collecting the state. Otherwise, they might as well get on with
-     * the next thing to do. */
-    if (DOMAINDECOMP(cr))
-    {
-#ifdef GMX_MPI
-        MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr),
-                  cr->mpi_comm_mygroup);
-#endif
-    }
-
-    if (bThisReplicaExchanged)
-    {
-        /* Exchange the states */
-        /* Collect the global state on the master node */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_collect_state(cr->dd, state_local, state);
-        }
-        else
-        {
-            copy_state_nonatomdata(state_local, state);
-        }
-
-        if (MASTER(cr))
-        {
-            /* There will be only one swap cycle with standard replica
-             * exchange, but there may be multiple swap cycles if we
-             * allow multiple swaps. */
-
-            for (j = 0; j < maxswap; j++)
-            {
-                exchange_partner = re->order[replica_id][j];
-
-                if (exchange_partner != replica_id)
-                {
-                    /* Exchange the global states between the master nodes */
-                    if (debug)
-                    {
-                        fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner);
-                    }
-                    exchange_state(cr->ms, exchange_partner, state);
-                }
-            }
-            /* For temperature-type replica exchange, we need to scale
-             * the velocities. */
-            if (re->type == ereTEMP || re->type == ereTL)
-            {
-                scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]]));
-            }
-
-        }
-
-        /* With domain decomposition the global state is distributed later */
-        if (!DOMAINDECOMP(cr))
-        {
-            /* Copy the global state to the local state data structure */
-            copy_state_nonatomdata(state, state_local);
-        }
-    }
-
-    return bThisReplicaExchanged;
-}
-
-void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re)
-{
-    int  i;
-
-    fprintf(fplog, "\nReplica exchange statistics\n");
-
-    if (re->nex == 0)
-    {
-        fprintf(fplog, "Repl  %d attempts, %d odd, %d even\n",
-                re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]);
-
-        fprintf(fplog, "Repl  average probabilities:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i%2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] =  re->prob_sum[i]/re->nattempt[i%2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, NULL);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "Repl  number of exchanges:\n");
-        print_ind(fplog, "", re->nrepl, re->ind, NULL);
-        print_count(fplog, "", re->nrepl, re->nexchange);
-
-        fprintf(fplog, "Repl  average number of exchanges:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i%2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] =  ((real)re->nexchange[i])/re->nattempt[i%2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, NULL);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "\n");
-    }
-    /* print the transition matrix */
-    print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt);
-}
-
-
-int replica_exchange_get_repl(const gmx_repl_ex_t re){
-  return re->repl;
-};
-
-int replica_exchange_get_nrepl(const gmx_repl_ex_t re){
-  return re->nrepl;
-};
-
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.cpp.preplumed b/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.cpp.preplumed
deleted file mode 100644
index 4b0e81063a0d383606a9bb2bcb00080d8ac89990..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.cpp.preplumed
+++ /dev/null
@@ -1,1440 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-#include "gmxpre.h"
-
-#include "repl_ex.h"
-
-#include "config.h"
-
-#include <math.h>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/legacyheaders/copyrite.h"
-#include "gromacs/legacyheaders/main.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/math/units.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/random/random.h"
-#include "gromacs/utility/smalloc.h"
-
-#define PROBABILITYCUTOFF 100
-/* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
-
-enum {
-    ereTEMP, ereLAMBDA, ereENDSINGLE, ereTL, ereNR
-};
-const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"};
-/* end_single_marker merely notes the end of single variable replica exchange. All types higher than
-   it are multiple replica exchange methods */
-/* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?;
-   Let's wait until we feel better about the pressure control methods giving exact ensembles.  Right now, we assume constant pressure  */
-
-typedef struct gmx_repl_ex
-{
-    int       repl;
-    int       nrepl;
-    real      temp;
-    int       type;
-    real    **q;
-    gmx_bool  bNPT;
-    real     *pres;
-    int      *ind;
-    int      *allswaps;
-    int       nst;
-    int       nex;
-    int       seed;
-    int       nattempt[2];
-    real     *prob_sum;
-    int     **nmoves;
-    int      *nexchange;
-    gmx_rng_t rng;
-
-    /* these are helper arrays for replica exchange; allocated here so they
-       don't have to be allocated each time */
-    int      *destinations;
-    int     **cyclic;
-    int     **order;
-    int      *tmpswap;
-    gmx_bool *incycle;
-    gmx_bool *bEx;
-
-    /* helper arrays to hold the quantities that are exchanged */
-    real  *prob;
-    real  *Epot;
-    real  *beta;
-    real  *Vol;
-    real **de;
-
-} t_gmx_repl_ex;
-
-static gmx_bool repl_quantity(const gmx_multisim_t *ms,
-                              struct gmx_repl_ex *re, int ere, real q)
-{
-    real    *qall;
-    gmx_bool bDiff;
-    int      s;
-
-    snew(qall, ms->nsim);
-    qall[re->repl] = q;
-    gmx_sum_sim(ms->nsim, qall, ms);
-
-    bDiff = FALSE;
-    for (s = 1; s < ms->nsim; s++)
-    {
-        if (qall[s] != qall[0])
-        {
-            bDiff = TRUE;
-        }
-    }
-
-    if (bDiff)
-    {
-        /* Set the replica exchange type and quantities */
-        re->type = ere;
-
-        snew(re->q[ere], re->nrepl);
-        for (s = 0; s < ms->nsim; s++)
-        {
-            re->q[ere][s] = qall[s];
-        }
-    }
-    sfree(qall);
-    return bDiff;
-}
-
-gmx_repl_ex_t init_replica_exchange(FILE *fplog,
-                                    const gmx_multisim_t *ms,
-                                    const t_state *state,
-                                    const t_inputrec *ir,
-                                    int nst, int nex, int init_seed)
-{
-    real                pres;
-    int                 i, j, k;
-    struct gmx_repl_ex *re;
-    gmx_bool            bTemp;
-    gmx_bool            bLambda = FALSE;
-
-    fprintf(fplog, "\nInitializing Replica Exchange\n");
-
-    if (ms == NULL || ms->nsim == 1)
-    {
-        gmx_fatal(FARGS, "Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?");
-    }
-    if (!EI_DYNAMICS(ir->eI))
-    {
-        gmx_fatal(FARGS, "Replica exchange is only supported by dynamical simulations");
-        /* Note that PAR(cr) is defined by cr->nnodes > 1, which is
-         * distinct from MULTISIM(cr). A multi-simulation only runs
-         * with real MPI parallelism, but this does not imply PAR(cr)
-         * is true!
-         *
-         * Since we are using a dynamical integrator, the only
-         * decomposition is DD, so PAR(cr) and DOMAINDECOMP(cr) are
-         * synonymous. The only way for cr->nnodes > 1 to be true is
-         * if we are using DD. */
-    }
-
-    snew(re, 1);
-
-    re->repl     = ms->sim;
-    re->nrepl    = ms->nsim;
-    snew(re->q, ereENDSINGLE);
-
-    fprintf(fplog, "Repl  There are %d replicas:\n", re->nrepl);
-
-    check_multi_int(fplog, ms, state->natoms, "the number of atoms", FALSE);
-    check_multi_int(fplog, ms, ir->eI, "the integrator", FALSE);
-    check_multi_int64(fplog, ms, ir->init_step+ir->nsteps, "init_step+nsteps", FALSE);
-    check_multi_int64(fplog, ms, (ir->init_step+nst-1)/nst,
-                      "first exchange step: init_step/-replex", FALSE);
-    check_multi_int(fplog, ms, ir->etc, "the temperature coupling", FALSE);
-    check_multi_int(fplog, ms, ir->opts.ngtc,
-                    "the number of temperature coupling groups", FALSE);
-    check_multi_int(fplog, ms, ir->epc, "the pressure coupling", FALSE);
-    check_multi_int(fplog, ms, ir->efep, "free energy", FALSE);
-    check_multi_int(fplog, ms, ir->fepvals->n_lambda, "number of lambda states", FALSE);
-
-    re->temp = ir->opts.ref_t[0];
-    for (i = 1; (i < ir->opts.ngtc); i++)
-    {
-        if (ir->opts.ref_t[i] != re->temp)
-        {
-            fprintf(fplog, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
-            fprintf(stderr, "\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
-        }
-    }
-
-    re->type = -1;
-    bTemp    = repl_quantity(ms, re, ereTEMP, re->temp);
-    if (ir->efep != efepNO)
-    {
-        bLambda = repl_quantity(ms, re, ereLAMBDA, (real)ir->fepvals->init_fep_state);
-    }
-    if (re->type == -1)  /* nothing was assigned */
-    {
-        gmx_fatal(FARGS, "The properties of the %d systems are all the same, there is nothing to exchange", re->nrepl);
-    }
-    if (bLambda && bTemp)
-    {
-        re->type = ereTL;
-    }
-
-    if (bTemp)
-    {
-        please_cite(fplog, "Sugita1999a");
-        if (ir->epc != epcNO)
-        {
-            re->bNPT = TRUE;
-            fprintf(fplog, "Repl  Using Constant Pressure REMD.\n");
-            please_cite(fplog, "Okabe2001a");
-        }
-        if (ir->etc == etcBERENDSEN)
-        {
-            gmx_fatal(FARGS, "REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead",
-                      ETCOUPLTYPE(ir->etc), ETCOUPLTYPE(etcVRESCALE));
-        }
-    }
-    if (bLambda)
-    {
-        if (ir->fepvals->delta_lambda != 0)   /* check this? */
-        {
-            gmx_fatal(FARGS, "delta_lambda is not zero");
-        }
-    }
-    if (re->bNPT)
-    {
-        snew(re->pres, re->nrepl);
-        if (ir->epct == epctSURFACETENSION)
-        {
-            pres = ir->ref_p[ZZ][ZZ];
-        }
-        else
-        {
-            pres = 0;
-            j    = 0;
-            for (i = 0; i < DIM; i++)
-            {
-                if (ir->compress[i][i] != 0)
-                {
-                    pres += ir->ref_p[i][i];
-                    j++;
-                }
-            }
-            pres /= j;
-        }
-        re->pres[re->repl] = pres;
-        gmx_sum_sim(re->nrepl, re->pres, ms);
-    }
-
-    /* Make an index for increasing replica order */
-    /* only makes sense if one or the other is varying, not both!
-       if both are varying, we trust the order the person gave. */
-    snew(re->ind, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->ind[i] = i;
-    }
-
-    if (re->type < ereENDSINGLE)
-    {
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = i+1; j < re->nrepl; j++)
-            {
-                if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]])
-                {
-                    /* Unordered replicas are supposed to work, but there
-                     * is still an issues somewhere.
-                     * Note that at this point still re->ind[i]=i.
-                     */
-                    gmx_fatal(FARGS, "Replicas with indices %d < %d have %ss %g > %g, please order your replicas on increasing %s",
-                              i, j,
-                              erename[re->type],
-                              re->q[re->type][i], re->q[re->type][j],
-                              erename[re->type]);
-
-                    k          = re->ind[i];
-                    re->ind[i] = re->ind[j];
-                    re->ind[j] = k;
-                }
-                else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]])
-                {
-                    gmx_fatal(FARGS, "Two replicas have identical %ss", erename[re->type]);
-                }
-            }
-        }
-    }
-
-    /* keep track of all the swaps, starting with the initial placement. */
-    snew(re->allswaps, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->allswaps[i] = re->ind[i];
-    }
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            fprintf(fplog, "\nReplica exchange in temperature\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[re->type][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereLAMBDA:
-            fprintf(fplog, "\nReplica exchange in lambda\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %3d", (int)re->q[re->type][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        case ereTL:
-            fprintf(fplog, "\nReplica exchange in temperature and lambda state\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5.1f", re->q[ereTEMP][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            for (i = 0; i < re->nrepl; i++)
-            {
-                fprintf(fplog, " %5d", (int)re->q[ereLAMBDA][re->ind[i]]);
-            }
-            fprintf(fplog, "\n");
-            break;
-        default:
-            gmx_incons("Unknown replica exchange quantity");
-    }
-    if (re->bNPT)
-    {
-        fprintf(fplog, "\nRepl  p");
-        for (i = 0; i < re->nrepl; i++)
-        {
-            fprintf(fplog, " %5.2f", re->pres[re->ind[i]]);
-        }
-
-        for (i = 0; i < re->nrepl; i++)
-        {
-            if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]]))
-            {
-                fprintf(fplog, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
-                fprintf(stderr, "\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
-            }
-        }
-    }
-    re->nst = nst;
-    if (init_seed == -1)
-    {
-        if (MASTERSIM(ms))
-        {
-            re->seed = (int)gmx_rng_make_seed();
-        }
-        else
-        {
-            re->seed = 0;
-        }
-        gmx_sumi_sim(1, &(re->seed), ms);
-    }
-    else
-    {
-        re->seed = init_seed;
-    }
-    fprintf(fplog, "\nReplica exchange interval: %d\n", re->nst);
-    fprintf(fplog, "\nReplica random seed: %d\n", re->seed);
-    re->rng = gmx_rng_init(re->seed);
-
-    re->nattempt[0] = 0;
-    re->nattempt[1] = 0;
-
-    snew(re->prob_sum, re->nrepl);
-    snew(re->nexchange, re->nrepl);
-    snew(re->nmoves, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->nmoves[i], re->nrepl);
-    }
-    fprintf(fplog, "Replica exchange information below: x=exchange, pr=probability\n");
-
-    /* generate space for the helper functions so we don't have to snew each time */
-
-    snew(re->destinations, re->nrepl);
-    snew(re->incycle, re->nrepl);
-    snew(re->tmpswap, re->nrepl);
-    snew(re->cyclic, re->nrepl);
-    snew(re->order, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->cyclic[i], re->nrepl+1);
-        snew(re->order[i], re->nrepl);
-    }
-    /* allocate space for the functions storing the data for the replicas */
-    /* not all of these arrays needed in all cases, but they don't take
-       up much space, since the max size is nrepl**2 */
-    snew(re->prob, re->nrepl);
-    snew(re->bEx, re->nrepl);
-    snew(re->beta, re->nrepl);
-    snew(re->Vol, re->nrepl);
-    snew(re->Epot, re->nrepl);
-    snew(re->de, re->nrepl);
-    for (i = 0; i < re->nrepl; i++)
-    {
-        snew(re->de[i], re->nrepl);
-    }
-    re->nex = nex;
-    return re;
-}
-
-static void exchange_reals(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, real *v, int n)
-{
-    real *buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n*sizeof(real), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-
-static void exchange_ints(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, int *v, int n)
-{
-    int *buf;
-    int  i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
-             buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
-             ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n*sizeof(int), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_doubles(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, double *v, int n)
-{
-    double *buf;
-    int     i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v,  n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf, n*sizeof(double), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            v[i] = buf[i];
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_rvecs(const gmx_multisim_t gmx_unused *ms, int gmx_unused b, rvec *v, int n)
-{
-    rvec *buf;
-    int   i;
-
-    if (v)
-    {
-        snew(buf, n);
-#ifdef GMX_MPI
-        /*
-           MPI_Sendrecv(v[0],  n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
-           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
-         */
-        {
-            MPI_Request mpi_req;
-
-            MPI_Isend(v[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0,
-                      ms->mpi_comm_masters, &mpi_req);
-            MPI_Recv(buf[0], n*sizeof(rvec), MPI_BYTE, MSRANK(ms, b), 0,
-                     ms->mpi_comm_masters, MPI_STATUS_IGNORE);
-            MPI_Wait(&mpi_req, MPI_STATUS_IGNORE);
-        }
-#endif
-        for (i = 0; i < n; i++)
-        {
-            copy_rvec(buf[i], v[i]);
-        }
-        sfree(buf);
-    }
-}
-
-static void exchange_state(const gmx_multisim_t *ms, int b, t_state *state)
-{
-    /* When t_state changes, this code should be updated. */
-    int ngtc, nnhpres;
-    ngtc    = state->ngtc * state->nhchainlength;
-    nnhpres = state->nnhpres* state->nhchainlength;
-    exchange_rvecs(ms, b, state->box, DIM);
-    exchange_rvecs(ms, b, state->box_rel, DIM);
-    exchange_rvecs(ms, b, state->boxv, DIM);
-    exchange_reals(ms, b, &(state->veta), 1);
-    exchange_reals(ms, b, &(state->vol0), 1);
-    exchange_rvecs(ms, b, state->svir_prev, DIM);
-    exchange_rvecs(ms, b, state->fvir_prev, DIM);
-    exchange_rvecs(ms, b, state->pres_prev, DIM);
-    exchange_doubles(ms, b, state->nosehoover_xi, ngtc);
-    exchange_doubles(ms, b, state->nosehoover_vxi, ngtc);
-    exchange_doubles(ms, b, state->nhpres_xi, nnhpres);
-    exchange_doubles(ms, b, state->nhpres_vxi, nnhpres);
-    exchange_doubles(ms, b, state->therm_integral, state->ngtc);
-    exchange_rvecs(ms, b, state->x, state->natoms);
-    exchange_rvecs(ms, b, state->v, state->natoms);
-    exchange_rvecs(ms, b, state->sd_X, state->natoms);
-}
-
-static void copy_rvecs(rvec *s, rvec *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            copy_rvec(s[i], d[i]);
-        }
-    }
-}
-
-static void copy_doubles(const double *s, double *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            d[i] = s[i];
-        }
-    }
-}
-
-static void copy_reals(const real *s, real *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            d[i] = s[i];
-        }
-    }
-}
-
-static void copy_ints(const int *s, int *d, int n)
-{
-    int i;
-
-    if (d != NULL)
-    {
-        for (i = 0; i < n; i++)
-        {
-            d[i] = s[i];
-        }
-    }
-}
-
-#define scopy_rvecs(v, n)   copy_rvecs(state->v, state_local->v, n);
-#define scopy_doubles(v, n) copy_doubles(state->v, state_local->v, n);
-#define scopy_reals(v, n) copy_reals(state->v, state_local->v, n);
-#define scopy_ints(v, n)   copy_ints(state->v, state_local->v, n);
-
-static void copy_state_nonatomdata(t_state *state, t_state *state_local)
-{
-    /* When t_state changes, this code should be updated. */
-    int ngtc, nnhpres;
-    ngtc    = state->ngtc * state->nhchainlength;
-    nnhpres = state->nnhpres* state->nhchainlength;
-    scopy_rvecs(box, DIM);
-    scopy_rvecs(box_rel, DIM);
-    scopy_rvecs(boxv, DIM);
-    state_local->veta = state->veta;
-    state_local->vol0 = state->vol0;
-    scopy_rvecs(svir_prev, DIM);
-    scopy_rvecs(fvir_prev, DIM);
-    scopy_rvecs(pres_prev, DIM);
-    scopy_doubles(nosehoover_xi, ngtc);
-    scopy_doubles(nosehoover_vxi, ngtc);
-    scopy_doubles(nhpres_xi, nnhpres);
-    scopy_doubles(nhpres_vxi, nnhpres);
-    scopy_doubles(therm_integral, state->ngtc);
-    scopy_rvecs(x, state->natoms);
-    scopy_rvecs(v, state->natoms);
-    scopy_rvecs(sd_X, state->natoms);
-    copy_ints(&(state->fep_state), &(state_local->fep_state), 1);
-    scopy_reals(lambda, efptNR);
-}
-
-static void scale_velocities(t_state *state, real fac)
-{
-    int i;
-
-    if (state->v)
-    {
-        for (i = 0; i < state->natoms; i++)
-        {
-            svmul(fac, state->v[i], state->v[i]);
-        }
-    }
-}
-
-static void print_transition_matrix(FILE *fplog, int n, int **nmoves, int *nattempt)
-{
-    int   i, j, ntot;
-    float Tprint;
-
-    ntot = nattempt[0] + nattempt[1];
-    fprintf(fplog, "\n");
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "    ");  /* put the title closer to the center */
-    }
-    fprintf(fplog, "Empirical Transition Matrix\n");
-
-    fprintf(fplog, "Repl");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%8d", (i+1));
-    }
-    fprintf(fplog, "\n");
-
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "Repl");
-        for (j = 0; j < n; j++)
-        {
-            Tprint = 0.0;
-            if (nmoves[i][j] > 0)
-            {
-                Tprint = nmoves[i][j]/(2.0*ntot);
-            }
-            fprintf(fplog, "%8.4f", Tprint);
-        }
-        fprintf(fplog, "%3d\n", i);
-    }
-}
-
-static void print_ind(FILE *fplog, const char *leg, int n, int *ind, gmx_bool *bEx)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s %2d", leg, ind[0]);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %c %2d", (bEx != 0 && bEx[i]) ? 'x' : ' ', ind[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_allswitchind(FILE *fplog, int n, int *pind, int *allswaps, int *tmpswap)
-{
-    int i;
-
-    for (i = 0; i < n; i++)
-    {
-        tmpswap[i] = allswaps[i];
-    }
-    for (i = 0; i < n; i++)
-    {
-        allswaps[i] = tmpswap[pind[i]];
-    }
-
-    fprintf(fplog, "\nAccepted Exchanges:   ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", pind[i]);
-    }
-    fprintf(fplog, "\n");
-
-    /* the "Order After Exchange" is the state label corresponding to the configuration that
-       started in state listed in order, i.e.
-
-       3 0 1 2
-
-       means that the:
-       configuration starting in simulation 3 is now in simulation 0,
-       configuration starting in simulation 0 is now in simulation 1,
-       configuration starting in simulation 1 is now in simulation 2,
-       configuration starting in simulation 2 is now in simulation 3
-     */
-    fprintf(fplog, "Order After Exchange: ");
-    for (i = 0; i < n; i++)
-    {
-        fprintf(fplog, "%d ", allswaps[i]);
-    }
-    fprintf(fplog, "\n\n");
-}
-
-static void print_prob(FILE *fplog, const char *leg, int n, real *prob)
-{
-    int  i;
-    char buf[8];
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        if (prob[i] >= 0)
-        {
-            sprintf(buf, "%4.2f", prob[i]);
-            fprintf(fplog, "  %3s", buf[0] == '1' ? "1.0" : buf+1);
-        }
-        else
-        {
-            fprintf(fplog, "     ");
-        }
-    }
-    fprintf(fplog, "\n");
-}
-
-static void print_count(FILE *fplog, const char *leg, int n, int *count)
-{
-    int i;
-
-    fprintf(fplog, "Repl %2s ", leg);
-    for (i = 1; i < n; i++)
-    {
-        fprintf(fplog, " %4d", count[i]);
-    }
-    fprintf(fplog, "\n");
-}
-
-static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp)
-{
-
-    real   ediff, dpV, delta = 0;
-    real  *Epot = re->Epot;
-    real  *Vol  = re->Vol;
-    real **de   = re->de;
-    real  *beta = re->beta;
-
-    /* Two cases; we are permuted and not.  In all cases, setting ap = a and bp = b will reduce
-       to the non permuted case */
-
-    switch (re->type)
-    {
-        case ereTEMP:
-            /*
-             * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
-             */
-            ediff = Epot[b] - Epot[a];
-            delta = -(beta[bp] - beta[ap])*ediff;
-            break;
-        case ereLAMBDA:
-            /* two cases:  when we are permuted, and not.  */
-            /* non-permuted:
-               ediff =  E_new - E_old
-                     =  [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)]
-                     =  [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)]
-                     =  de[b][a] + de[a][b] */
-
-            /* permuted:
-               ediff =  E_new - E_old
-                     =  [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)]
-                     =  [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)]
-                     =  [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)]
-                     =  (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b])    */
-            /* but, in the current code implementation, we flip configurations, not indices . . .
-               So let's examine that.
-                     =  [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)]
-                     =  [H_b(x_ap) - H_a(x_ap)]  + [H_a(x_bp) - H_b(x_pb)]
-                     = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp]
-                     So, if we exchange b<=> bp and a<=> ap, we return to the same result.
-                     So the simple solution is to flip the
-                     position of perturbed and original indices in the tests.
-             */
-
-            ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]);
-            delta = ediff*beta[a]; /* assume all same temperature in this case */
-            break;
-        case ereTL:
-            /* not permuted:  */
-            /* delta =  reduced E_new - reduced E_old
-                     =  [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)]
-                     =  [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] +
-                        [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)]
-                     =  [beta_b dH_b(x_a) + [beta_a dH_a(x_b) +
-                        beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b))
-                     =  beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */
-            /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */
-            /* permuted (big breath!) */
-            /*   delta =  reduced E_new - reduced E_old
-                     =  [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                     =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
-                        - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a)
-                        - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] +
-                        [(beta_ap H_ap(x_b)  - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))]
-             + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
-                     =  [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] +
-                        [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))]
-             + beta_pb (H_a(x_a) - H_b(x_b))  - beta_ap (H_a(x_a) - H_b(x_b))
-                     =  ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b]  - beta_bp de[bp][b])
-             + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b))  */
-            delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]);
-            break;
-        default:
-            gmx_incons("Unknown replica exchange quantity");
-    }
-    if (bPrint)
-    {
-        fprintf(fplog, "Repl %d <-> %d  dE_term = %10.3e (kT)\n", a, b, delta);
-    }
-    if (re->bNPT)
-    {
-        /* revist the calculation for 5.0.  Might be some improvements. */
-        dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC;
-        if (bPrint)
-        {
-            fprintf(fplog, "  dpV = %10.3e  d = %10.3e\n", dpV, delta + dpV);
-        }
-        delta += dpV;
-    }
-    return delta;
-}
-
-static void
-test_for_replica_exchange(FILE                 *fplog,
-                          const gmx_multisim_t *ms,
-                          struct gmx_repl_ex   *re,
-                          gmx_enerdata_t       *enerd,
-                          real                  vol,
-                          gmx_int64_t           step,
-                          real                  time)
-{
-    int       m, i, j, a, b, ap, bp, i0, i1, tmp;
-    real      delta = 0;
-    gmx_bool  bPrint, bMultiEx;
-    gmx_bool *bEx      = re->bEx;
-    real     *prob     = re->prob;
-    int      *pind     = re->destinations; /* permuted index */
-    gmx_bool  bEpot    = FALSE;
-    gmx_bool  bDLambda = FALSE;
-    gmx_bool  bVol     = FALSE;
-
-    bMultiEx = (re->nex > 1);  /* multiple exchanges at each state */
-    fprintf(fplog, "Replica exchange at step %" GMX_PRId64 " time %.5f\n", step, time);
-
-    if (re->bNPT)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Vol[i] = 0;
-        }
-        bVol               = TRUE;
-        re->Vol[re->repl]  = vol;
-    }
-    if ((re->type == ereTEMP || re->type == ereTL))
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->Epot[i] = 0;
-        }
-        bEpot              = TRUE;
-        re->Epot[re->repl] = enerd->term[F_EPOT];
-        /* temperatures of different states*/
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ);
-        }
-    }
-    else
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->beta[i] = 1.0/(re->temp*BOLTZ);  /* we have a single temperature */
-        }
-    }
-    if (re->type == ereLAMBDA || re->type == ereTL)
-    {
-        bDLambda = TRUE;
-        /* lambda differences. */
-        /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian
-           minus the energy of the jth simulation in the jth Hamiltonian */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->de[i][j] = 0;
-            }
-        }
-        for (i = 0; i < re->nrepl; i++)
-        {
-            re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]);
-        }
-    }
-
-    /* now actually do the communication */
-    if (bVol)
-    {
-        gmx_sum_sim(re->nrepl, re->Vol, ms);
-    }
-    if (bEpot)
-    {
-        gmx_sum_sim(re->nrepl, re->Epot, ms);
-    }
-    if (bDLambda)
-    {
-        for (i = 0; i < re->nrepl; i++)
-        {
-            gmx_sum_sim(re->nrepl, re->de[i], ms);
-        }
-    }
-
-    /* make a duplicate set of indices for shuffling */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        pind[i] = re->ind[i];
-    }
-
-    if (bMultiEx)
-    {
-        /* multiple random switch exchange */
-        int nself = 0;
-        for (i = 0; i < re->nex + nself; i++)
-        {
-            double rnd[2];
-
-            gmx_rng_cycle_2uniform(step, i*2, re->seed, RND_SEED_REPLEX, rnd);
-            /* randomly select a pair  */
-            /* in theory, could reduce this by identifying only which switches had a nonneglibible
-               probability of occurring (log p > -100) and only operate on those switches */
-            /* find out which state it is from, and what label that state currently has. Likely
-               more work that useful. */
-            i0 = (int)(re->nrepl*rnd[0]);
-            i1 = (int)(re->nrepl*rnd[1]);
-            if (i0 == i1)
-            {
-                nself++;
-                continue;  /* self-exchange, back up and do it again */
-            }
-
-            a  = re->ind[i0]; /* what are the indices of these states? */
-            b  = re->ind[i1];
-            ap = pind[i0];
-            bp = pind[i1];
-
-            bPrint = FALSE; /* too noisy */
-            /* calculate the energy difference */
-            /* if the code changes to flip the STATES, rather than the configurations,
-               use the commented version of the code */
-            /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */
-            delta = calc_delta(fplog, bPrint, re, ap, bp, a, b);
-
-            /* we actually only use the first space in the prob and bEx array,
-               since there are actually many switches between pairs. */
-
-            if (delta <= 0)
-            {
-                /* accepted */
-                prob[0] = 1;
-                bEx[0]  = TRUE;
-            }
-            else
-            {
-                if (delta > PROBABILITYCUTOFF)
-                {
-                    prob[0] = 0;
-                }
-                else
-                {
-                    prob[0] = exp(-delta);
-                }
-                /* roll a number to determine if accepted */
-                gmx_rng_cycle_2uniform(step, i*2+1, re->seed, RND_SEED_REPLEX, rnd);
-                bEx[0] = rnd[0] < prob[0];
-            }
-            re->prob_sum[0] += prob[0];
-
-            if (bEx[0])
-            {
-                /* swap the states */
-                tmp      = pind[i0];
-                pind[i0] = pind[i1];
-                pind[i1] = tmp;
-            }
-        }
-        re->nattempt[0]++;  /* keep track of total permutation trials here */
-        print_allswitchind(fplog, re->nrepl, pind, re->allswaps, re->tmpswap);
-    }
-    else
-    {
-        /* standard nearest neighbor replica exchange */
-
-        m = (step / re->nst) % 2;
-        for (i = 1; i < re->nrepl; i++)
-        {
-            a = re->ind[i-1];
-            b = re->ind[i];
-
-            bPrint = (re->repl == a || re->repl == b);
-            if (i % 2 == m)
-            {
-                delta = calc_delta(fplog, bPrint, re, a, b, a, b);
-                if (delta <= 0)
-                {
-                    /* accepted */
-                    prob[i] = 1;
-                    bEx[i]  = TRUE;
-                }
-                else
-                {
-                    double rnd[2];
-
-                    if (delta > PROBABILITYCUTOFF)
-                    {
-                        prob[i] = 0;
-                    }
-                    else
-                    {
-                        prob[i] = exp(-delta);
-                    }
-                    /* roll a number to determine if accepted */
-                    gmx_rng_cycle_2uniform(step, i, re->seed, RND_SEED_REPLEX, rnd);
-                    bEx[i] = rnd[0] < prob[i];
-                }
-                re->prob_sum[i] += prob[i];
-
-                if (bEx[i])
-                {
-                    /* swap these two */
-                    tmp       = pind[i-1];
-                    pind[i-1] = pind[i];
-                    pind[i]   = tmp;
-                    re->nexchange[i]++;  /* statistics for back compatibility */
-                }
-            }
-            else
-            {
-                prob[i] = -1;
-                bEx[i]  = FALSE;
-            }
-        }
-        /* print some statistics */
-        print_ind(fplog, "ex", re->nrepl, re->ind, bEx);
-        print_prob(fplog, "pr", re->nrepl, prob);
-        fprintf(fplog, "\n");
-        re->nattempt[m]++;
-    }
-
-    /* record which moves were made and accepted */
-    for (i = 0; i < re->nrepl; i++)
-    {
-        re->nmoves[re->ind[i]][pind[i]] += 1;
-        re->nmoves[pind[i]][re->ind[i]] += 1;
-    }
-    fflush(fplog); /* make sure we can see what the last exchange was */
-}
-
-static void write_debug_x(t_state *state)
-{
-    int i;
-
-    if (debug)
-    {
-        for (i = 0; i < state->natoms; i += 10)
-        {
-            fprintf(debug, "dx %5d %10.5f %10.5f %10.5f\n", i, state->x[i][XX], state->x[i][YY], state->x[i][ZZ]);
-        }
-    }
-}
-
-static void
-cyclic_decomposition(const int *destinations,
-                     int      **cyclic,
-                     gmx_bool  *incycle,
-                     const int  nrepl,
-                     int       *nswap)
-{
-
-    int i, j, c, p;
-    int maxlen = 1;
-    for (i = 0; i < nrepl; i++)
-    {
-        incycle[i] = FALSE;
-    }
-    for (i = 0; i < nrepl; i++)  /* one cycle for each replica */
-    {
-        if (incycle[i])
-        {
-            cyclic[i][0] = -1;
-            continue;
-        }
-        cyclic[i][0] = i;
-        incycle[i]   = TRUE;
-        c            = 1;
-        p            = i;
-        for (j = 0; j < nrepl; j++) /* potentially all cycles are part, but we will break first */
-        {
-            p = destinations[p];    /* start permuting */
-            if (p == i)
-            {
-                cyclic[i][c] = -1;
-                if (c > maxlen)
-                {
-                    maxlen = c;
-                }
-                break; /* we've reached the original element, the cycle is complete, and we marked the end. */
-            }
-            else
-            {
-                cyclic[i][c] = p;  /* each permutation gives a new member of the cycle */
-                incycle[p]   = TRUE;
-                c++;
-            }
-        }
-    }
-    *nswap = maxlen - 1;
-
-    if (debug)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(debug, "Cycle %d:", i);
-            for (j = 0; j < nrepl; j++)
-            {
-                if (cyclic[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", cyclic[i][j]);
-            }
-            fprintf(debug, "\n");
-        }
-        fflush(debug);
-    }
-}
-
-static void
-compute_exchange_order(FILE     *fplog,
-                       int     **cyclic,
-                       int     **order,
-                       const int nrepl,
-                       const int maxswap)
-{
-    int i, j;
-
-    for (j = 0; j < maxswap; j++)
-    {
-        for (i = 0; i < nrepl; i++)
-        {
-            if (cyclic[i][j+1] >= 0)
-            {
-                order[cyclic[i][j+1]][j] = cyclic[i][j];
-                order[cyclic[i][j]][j]   = cyclic[i][j+1];
-            }
-        }
-        for (i = 0; i < nrepl; i++)
-        {
-            if (order[i][j] < 0)
-            {
-                order[i][j] = i; /* if it's not exchanging, it should stay this round*/
-            }
-        }
-    }
-
-    if (debug)
-    {
-        fprintf(fplog, "Replica Exchange Order\n");
-        for (i = 0; i < nrepl; i++)
-        {
-            fprintf(fplog, "Replica %d:", i);
-            for (j = 0; j < maxswap; j++)
-            {
-                if (order[i][j] < 0)
-                {
-                    break;
-                }
-                fprintf(debug, "%2d", order[i][j]);
-            }
-            fprintf(fplog, "\n");
-        }
-        fflush(fplog);
-    }
-}
-
-static void
-prepare_to_do_exchange(FILE               *fplog,
-                       struct gmx_repl_ex *re,
-                       const int           replica_id,
-                       int                *maxswap,
-                       gmx_bool           *bThisReplicaExchanged)
-{
-    int i, j;
-    /* Hold the cyclic decomposition of the (multiple) replica
-     * exchange. */
-    gmx_bool bAnyReplicaExchanged = FALSE;
-    *bThisReplicaExchanged = FALSE;
-
-    for (i = 0; i < re->nrepl; i++)
-    {
-        if (re->destinations[i] != re->ind[i])
-        {
-            /* only mark as exchanged if the index has been shuffled */
-            bAnyReplicaExchanged = TRUE;
-            break;
-        }
-    }
-    if (bAnyReplicaExchanged)
-    {
-        /* reinitialize the placeholder arrays */
-        for (i = 0; i < re->nrepl; i++)
-        {
-            for (j = 0; j < re->nrepl; j++)
-            {
-                re->cyclic[i][j] = -1;
-                re->order[i][j]  = -1;
-            }
-        }
-
-        /* Identify the cyclic decomposition of the permutation (very
-         * fast if neighbor replica exchange). */
-        cyclic_decomposition(re->destinations, re->cyclic, re->incycle, re->nrepl, maxswap);
-
-        /* Now translate the decomposition into a replica exchange
-         * order at each step. */
-        compute_exchange_order(fplog, re->cyclic, re->order, re->nrepl, *maxswap);
-
-        /* Did this replica do any exchange at any point? */
-        for (j = 0; j < *maxswap; j++)
-        {
-            if (replica_id != re->order[replica_id][j])
-            {
-                *bThisReplicaExchanged = TRUE;
-                break;
-            }
-        }
-    }
-}
-
-gmx_bool replica_exchange(FILE *fplog, const t_commrec *cr, struct gmx_repl_ex *re,
-                          t_state *state, gmx_enerdata_t *enerd,
-                          t_state *state_local, gmx_int64_t step, real time)
-{
-    int j;
-    int replica_id = 0;
-    int exchange_partner;
-    int maxswap = 0;
-    /* Number of rounds of exchanges needed to deal with any multiple
-     * exchanges. */
-    /* Where each replica ends up after the exchange attempt(s). */
-    /* The order in which multiple exchanges will occur. */
-    gmx_bool bThisReplicaExchanged = FALSE;
-
-    if (MASTER(cr))
-    {
-        replica_id  = re->repl;
-        test_for_replica_exchange(fplog, cr->ms, re, enerd, det(state_local->box), step, time);
-        prepare_to_do_exchange(fplog, re, replica_id, &maxswap, &bThisReplicaExchanged);
-    }
-    /* Do intra-simulation broadcast so all processors belonging to
-     * each simulation know whether they need to participate in
-     * collecting the state. Otherwise, they might as well get on with
-     * the next thing to do. */
-    if (DOMAINDECOMP(cr))
-    {
-#ifdef GMX_MPI
-        MPI_Bcast(&bThisReplicaExchanged, sizeof(gmx_bool), MPI_BYTE, MASTERRANK(cr),
-                  cr->mpi_comm_mygroup);
-#endif
-    }
-
-    if (bThisReplicaExchanged)
-    {
-        /* Exchange the states */
-        /* Collect the global state on the master node */
-        if (DOMAINDECOMP(cr))
-        {
-            dd_collect_state(cr->dd, state_local, state);
-        }
-        else
-        {
-            copy_state_nonatomdata(state_local, state);
-        }
-
-        if (MASTER(cr))
-        {
-            /* There will be only one swap cycle with standard replica
-             * exchange, but there may be multiple swap cycles if we
-             * allow multiple swaps. */
-
-            for (j = 0; j < maxswap; j++)
-            {
-                exchange_partner = re->order[replica_id][j];
-
-                if (exchange_partner != replica_id)
-                {
-                    /* Exchange the global states between the master nodes */
-                    if (debug)
-                    {
-                        fprintf(debug, "Exchanging %d with %d\n", replica_id, exchange_partner);
-                    }
-                    exchange_state(cr->ms, exchange_partner, state);
-                }
-            }
-            /* For temperature-type replica exchange, we need to scale
-             * the velocities. */
-            if (re->type == ereTEMP || re->type == ereTL)
-            {
-                scale_velocities(state, sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]]));
-            }
-
-        }
-
-        /* With domain decomposition the global state is distributed later */
-        if (!DOMAINDECOMP(cr))
-        {
-            /* Copy the global state to the local state data structure */
-            copy_state_nonatomdata(state, state_local);
-        }
-    }
-
-    return bThisReplicaExchanged;
-}
-
-void print_replica_exchange_statistics(FILE *fplog, struct gmx_repl_ex *re)
-{
-    int  i;
-
-    fprintf(fplog, "\nReplica exchange statistics\n");
-
-    if (re->nex == 0)
-    {
-        fprintf(fplog, "Repl  %d attempts, %d odd, %d even\n",
-                re->nattempt[0]+re->nattempt[1], re->nattempt[1], re->nattempt[0]);
-
-        fprintf(fplog, "Repl  average probabilities:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i%2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] =  re->prob_sum[i]/re->nattempt[i%2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, NULL);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "Repl  number of exchanges:\n");
-        print_ind(fplog, "", re->nrepl, re->ind, NULL);
-        print_count(fplog, "", re->nrepl, re->nexchange);
-
-        fprintf(fplog, "Repl  average number of exchanges:\n");
-        for (i = 1; i < re->nrepl; i++)
-        {
-            if (re->nattempt[i%2] == 0)
-            {
-                re->prob[i] = 0;
-            }
-            else
-            {
-                re->prob[i] =  ((real)re->nexchange[i])/re->nattempt[i%2];
-            }
-        }
-        print_ind(fplog, "", re->nrepl, re->ind, NULL);
-        print_prob(fplog, "", re->nrepl, re->prob);
-
-        fprintf(fplog, "\n");
-    }
-    /* print the transition matrix */
-    print_transition_matrix(fplog, re->nrepl, re->nmoves, re->nattempt);
-}
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.h b/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.h
deleted file mode 100644
index 29d7bfa4954834b9b02dcaed60e0e4f496e3ff67..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-#ifndef _repl_ex_h
-#define _repl_ex_h
-
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Abstract type for replica exchange */
-typedef struct gmx_repl_ex *gmx_repl_ex_t;
-
-extern gmx_repl_ex_t init_replica_exchange(FILE *fplog,
-                                           const gmx_multisim_t *ms,
-                                           const t_state *state,
-                                           const t_inputrec *ir,
-                                           int nst, int nmultiex, int init_seed);
-/* Should only be called on the master nodes */
-
-extern gmx_bool replica_exchange(FILE *fplog,
-                                 const t_commrec *cr,
-                                 gmx_repl_ex_t re,
-                                 t_state *state, gmx_enerdata_t *enerd,
-                                 t_state *state_local,
-                                 gmx_int64_t step, real time);
-/* Attempts replica exchange, should be called on all nodes.
- * Returns TRUE if this state has been exchanged.
- * When running each replica in parallel,
- * this routine collects the state on the master node before exchange.
- * With domain decomposition, the global state after exchange is stored
- * in state and still needs to be redistributed over the nodes.
- */
-
-extern void print_replica_exchange_statistics(FILE *fplog, gmx_repl_ex_t re);
-/* Should only be called on the master nodes */
-
-extern int replica_exchange_get_repl(const gmx_repl_ex_t re);
-extern int replica_exchange_get_nrepl(const gmx_repl_ex_t re);
-extern void pd_collect_state(const t_commrec *cr, t_state *state);
-extern void exchange_state(const gmx_multisim_t *ms, int b, t_state *state);
-extern void copy_state_nonatomdata(t_state *state, t_state *state_local);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* _repl_ex_h */
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.h.preplumed b/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.h.preplumed
deleted file mode 100644
index ffedd2305f626acdec1086a2aaa0169a029548f8..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/repl_ex.h.preplumed
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-#ifndef _repl_ex_h
-#define _repl_ex_h
-
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/types/commrec.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Abstract type for replica exchange */
-typedef struct gmx_repl_ex *gmx_repl_ex_t;
-
-extern gmx_repl_ex_t init_replica_exchange(FILE *fplog,
-                                           const gmx_multisim_t *ms,
-                                           const t_state *state,
-                                           const t_inputrec *ir,
-                                           int nst, int nmultiex, int init_seed);
-/* Should only be called on the master nodes */
-
-extern gmx_bool replica_exchange(FILE *fplog,
-                                 const t_commrec *cr,
-                                 gmx_repl_ex_t re,
-                                 t_state *state, gmx_enerdata_t *enerd,
-                                 t_state *state_local,
-                                 gmx_int64_t step, real time);
-/* Attempts replica exchange, should be called on all nodes.
- * Returns TRUE if this state has been exchanged.
- * When running each replica in parallel,
- * this routine collects the state on the master node before exchange.
- * With domain decomposition, the global state after exchange is stored
- * in state and still needs to be redistributed over the nodes.
- */
-
-extern void print_replica_exchange_statistics(FILE *fplog, gmx_repl_ex_t re);
-/* Should only be called on the master nodes */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  /* _repl_ex_h */
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/runner.cpp b/patches/gromacs-5.1.4.diff/src/programs/mdrun/runner.cpp
deleted file mode 100644
index 6e1adee9cf5fc7dacc09308331d9484261210de3..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/runner.cpp
+++ /dev/null
@@ -1,1363 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <assert.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <algorithm>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/tpxio.h"
-#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
-#include "gromacs/legacyheaders/checkpoint.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/copyrite.h"
-#include "gromacs/legacyheaders/disre.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/gmx_detect_hardware.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/gmx_thread_affinity.h"
-#include "gromacs/legacyheaders/inputrec.h"
-#include "gromacs/legacyheaders/main.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/oenv.h"
-#include "gromacs/legacyheaders/orires.h"
-#include "gromacs/legacyheaders/qmmm.h"
-#include "gromacs/legacyheaders/sighandler.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/math/calculate-ewald-splitting-coefficient.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/calc_verletbuf.h"
-#include "gromacs/mdlib/nbnxn_search.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/utility/gmxassert.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "deform.h"
-#include "membed.h"
-#include "repl_ex.h"
-#include "resource-division.h"
-
-/* PLUMED */
-#include "../../../Plumed.h"
-extern int    plumedswitch;
-extern plumed plumedmain;
-/* END PLUMED */
-
-#ifdef GMX_FAHCORE
-#include "corewrap.h"
-#endif
-
-typedef struct {
-    gmx_integrator_t *func;
-} gmx_intp_t;
-
-/* The array should match the eI array in include/types/enums.h */
-const gmx_intp_t    integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md}, {do_md}};
-
-gmx_int64_t         deform_init_init_step_tpx;
-matrix              deform_init_box_tpx;
-tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
-
-
-#ifdef GMX_THREAD_MPI
-/* The minimum number of atoms per tMPI thread. With fewer atoms than this,
- * the number of threads will get lowered.
- */
-#define MIN_ATOMS_PER_MPI_THREAD    90
-#define MIN_ATOMS_PER_GPU           900
-
-struct mdrunner_arglist
-{
-    gmx_hw_opt_t    hw_opt;
-    FILE           *fplog;
-    t_commrec      *cr;
-    int             nfile;
-    const t_filenm *fnm;
-    output_env_t    oenv;
-    gmx_bool        bVerbose;
-    gmx_bool        bCompact;
-    int             nstglobalcomm;
-    ivec            ddxyz;
-    int             dd_node_order;
-    real            rdd;
-    real            rconstr;
-    const char     *dddlb_opt;
-    real            dlb_scale;
-    const char     *ddcsx;
-    const char     *ddcsy;
-    const char     *ddcsz;
-    const char     *nbpu_opt;
-    int             nstlist_cmdline;
-    gmx_int64_t     nsteps_cmdline;
-    int             nstepout;
-    int             resetstep;
-    int             nmultisim;
-    int             repl_ex_nst;
-    int             repl_ex_nex;
-    int             repl_ex_seed;
-    real            pforce;
-    real            cpt_period;
-    real            max_hours;
-    int             imdport;
-    unsigned long   Flags;
-};
-
-
-/* The function used for spawning threads. Extracts the mdrunner()
-   arguments from its one argument and calls mdrunner(), after making
-   a commrec. */
-static void mdrunner_start_fn(void *arg)
-{
-    struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg;
-    struct mdrunner_arglist  mc  = *mda; /* copy the arg list to make sure
-                                            that it's thread-local. This doesn't
-                                            copy pointed-to items, of course,
-                                            but those are all const. */
-    t_commrec *cr;                       /* we need a local version of this */
-    FILE      *fplog = NULL;
-    t_filenm  *fnm;
-
-    fnm = dup_tfn(mc.nfile, mc.fnm);
-
-    cr = reinitialize_commrec_for_this_thread(mc.cr);
-
-    if (MASTER(cr))
-    {
-        fplog = mc.fplog;
-    }
-
-    mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
-             mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
-             mc.ddxyz, mc.dd_node_order, mc.rdd,
-             mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
-             mc.ddcsx, mc.ddcsy, mc.ddcsz,
-             mc.nbpu_opt, mc.nstlist_cmdline,
-             mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
-             mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
-             mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags);
-}
-
-/* called by mdrunner() to start a specific number of threads (including
-   the main thread) for thread-parallel runs. This in turn calls mdrunner()
-   for each thread.
-   All options besides nthreads are the same as for mdrunner(). */
-static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
-                                         FILE *fplog, t_commrec *cr, int nfile,
-                                         const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
-                                         gmx_bool bCompact, int nstglobalcomm,
-                                         ivec ddxyz, int dd_node_order, real rdd, real rconstr,
-                                         const char *dddlb_opt, real dlb_scale,
-                                         const char *ddcsx, const char *ddcsy, const char *ddcsz,
-                                         const char *nbpu_opt, int nstlist_cmdline,
-                                         gmx_int64_t nsteps_cmdline,
-                                         int nstepout, int resetstep,
-                                         int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
-                                         real pforce, real cpt_period, real max_hours,
-                                         unsigned long Flags)
-{
-    int                      ret;
-    struct mdrunner_arglist *mda;
-    t_commrec               *crn; /* the new commrec */
-    t_filenm                *fnmn;
-
-    /* first check whether we even need to start tMPI */
-    if (hw_opt->nthreads_tmpi < 2)
-    {
-        return cr;
-    }
-
-    /* a few small, one-time, almost unavoidable memory leaks: */
-    snew(mda, 1);
-    fnmn = dup_tfn(nfile, fnm);
-
-    /* fill the data structure to pass as void pointer to thread start fn */
-    /* hw_opt contains pointers, which should all be NULL at this stage */
-    mda->hw_opt          = *hw_opt;
-    mda->fplog           = fplog;
-    mda->cr              = cr;
-    mda->nfile           = nfile;
-    mda->fnm             = fnmn;
-    mda->oenv            = oenv;
-    mda->bVerbose        = bVerbose;
-    mda->bCompact        = bCompact;
-    mda->nstglobalcomm   = nstglobalcomm;
-    mda->ddxyz[XX]       = ddxyz[XX];
-    mda->ddxyz[YY]       = ddxyz[YY];
-    mda->ddxyz[ZZ]       = ddxyz[ZZ];
-    mda->dd_node_order   = dd_node_order;
-    mda->rdd             = rdd;
-    mda->rconstr         = rconstr;
-    mda->dddlb_opt       = dddlb_opt;
-    mda->dlb_scale       = dlb_scale;
-    mda->ddcsx           = ddcsx;
-    mda->ddcsy           = ddcsy;
-    mda->ddcsz           = ddcsz;
-    mda->nbpu_opt        = nbpu_opt;
-    mda->nstlist_cmdline = nstlist_cmdline;
-    mda->nsteps_cmdline  = nsteps_cmdline;
-    mda->nstepout        = nstepout;
-    mda->resetstep       = resetstep;
-    mda->nmultisim       = nmultisim;
-    mda->repl_ex_nst     = repl_ex_nst;
-    mda->repl_ex_nex     = repl_ex_nex;
-    mda->repl_ex_seed    = repl_ex_seed;
-    mda->pforce          = pforce;
-    mda->cpt_period      = cpt_period;
-    mda->max_hours       = max_hours;
-    mda->Flags           = Flags;
-
-    /* now spawn new threads that start mdrunner_start_fn(), while
-       the main thread returns, we set thread affinity later */
-    ret = tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE,
-                       mdrunner_start_fn, (void*)(mda) );
-    if (ret != TMPI_SUCCESS)
-    {
-        return NULL;
-    }
-
-    crn = reinitialize_commrec_for_this_thread(cr);
-    return crn;
-}
-
-#endif /* GMX_THREAD_MPI */
-
-
-/* We determine the extra cost of the non-bonded kernels compared to
- * a reference nstlist value of 10 (which is the default in grompp).
- */
-static const int    nbnxnReferenceNstlist = 10;
-/* The values to try when switching  */
-const int           nstlist_try[] = { 20, 25, 40 };
-#define NNSTL  sizeof(nstlist_try)/sizeof(nstlist_try[0])
-/* Increase nstlist until the non-bonded cost increases more than listfac_ok,
- * but never more than listfac_max.
- * A standard (protein+)water system at 300K with PME ewald_rtol=1e-5
- * needs 1.28 at rcoulomb=0.9 and 1.24 at rcoulomb=1.0 to get to nstlist=40.
- * Note that both CPU and GPU factors are conservative. Performance should
- * not go down due to this tuning, except with a relatively slow GPU.
- * On the other hand, at medium/high parallelization or with fast GPUs
- * nstlist will not be increased enough to reach optimal performance.
- */
-/* CPU: pair-search is about a factor 1.5 slower than the non-bonded kernel */
-static const float  nbnxn_cpu_listfac_ok    = 1.05;
-static const float  nbnxn_cpu_listfac_max   = 1.09;
-/* GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel */
-static const float  nbnxn_gpu_listfac_ok    = 1.20;
-static const float  nbnxn_gpu_listfac_max   = 1.30;
-
-/* Try to increase nstlist when using the Verlet cut-off scheme */
-static void increase_nstlist(FILE *fp, t_commrec *cr,
-                             t_inputrec *ir, int nstlist_cmdline,
-                             const gmx_mtop_t *mtop, matrix box,
-                             gmx_bool bGPU)
-{
-    float                  listfac_ok, listfac_max;
-    int                    nstlist_orig, nstlist_prev;
-    verletbuf_list_setup_t ls;
-    real                   rlistWithReferenceNstlist, rlist_inc, rlist_ok, rlist_max;
-    real                   rlist_new, rlist_prev;
-    size_t                 nstlist_ind = 0;
-    t_state                state_tmp;
-    gmx_bool               bBox, bDD, bCont;
-    const char            *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
-    const char            *nve_err  = "Can not increase nstlist because an NVE ensemble is used";
-    const char            *vbd_err  = "Can not increase nstlist because verlet-buffer-tolerance is not set or used";
-    const char            *box_err  = "Can not increase nstlist because the box is too small";
-    const char            *dd_err   = "Can not increase nstlist because of domain decomposition limitations";
-    char                   buf[STRLEN];
-    const float            oneThird = 1.0f / 3.0f;
-
-    if (nstlist_cmdline <= 0)
-    {
-        if (ir->nstlist == 1)
-        {
-            /* The user probably set nstlist=1 for a reason,
-             * don't mess with the settings.
-             */
-            return;
-        }
-
-        if (fp != NULL && bGPU && ir->nstlist < nstlist_try[0])
-        {
-            fprintf(fp, nstl_gpu, ir->nstlist);
-        }
-        nstlist_ind = 0;
-        while (nstlist_ind < NNSTL && ir->nstlist >= nstlist_try[nstlist_ind])
-        {
-            nstlist_ind++;
-        }
-        if (nstlist_ind == NNSTL)
-        {
-            /* There are no larger nstlist value to try */
-            return;
-        }
-    }
-
-    if (EI_MD(ir->eI) && ir->etc == etcNO)
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "%s\n", nve_err);
-        }
-        if (fp != NULL)
-        {
-            fprintf(fp, "%s\n", nve_err);
-        }
-
-        return;
-    }
-
-    if (ir->verletbuf_tol == 0 && bGPU)
-    {
-        gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
-    }
-
-    if (ir->verletbuf_tol < 0)
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "%s\n", vbd_err);
-        }
-        if (fp != NULL)
-        {
-            fprintf(fp, "%s\n", vbd_err);
-        }
-
-        return;
-    }
-
-    if (bGPU)
-    {
-        listfac_ok  = nbnxn_gpu_listfac_ok;
-        listfac_max = nbnxn_gpu_listfac_max;
-    }
-    else
-    {
-        listfac_ok  = nbnxn_cpu_listfac_ok;
-        listfac_max = nbnxn_cpu_listfac_max;
-    }
-
-    nstlist_orig = ir->nstlist;
-    if (nstlist_cmdline > 0)
-    {
-        if (fp)
-        {
-            sprintf(buf, "Getting nstlist=%d from command line option",
-                    nstlist_cmdline);
-        }
-        ir->nstlist = nstlist_cmdline;
-    }
-
-    verletbuf_get_list_setup(TRUE, bGPU, &ls);
-
-    /* Allow rlist to make the list a given factor larger than the list
-     * would be with the reference value for nstlist (10).
-     */
-    nstlist_prev = ir->nstlist;
-    ir->nstlist  = nbnxnReferenceNstlist;
-    calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL,
-                            &rlistWithReferenceNstlist);
-    ir->nstlist  = nstlist_prev;
-
-    /* Determine the pair list size increase due to zero interactions */
-    rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j,
-                                              mtop->natoms/det(box));
-    rlist_ok  = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_ok, oneThird) - rlist_inc;
-    rlist_max = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_max, oneThird) - rlist_inc;
-    if (debug)
-    {
-        fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n",
-                rlist_inc, rlist_ok, rlist_max);
-    }
-
-    nstlist_prev = nstlist_orig;
-    rlist_prev   = ir->rlist;
-    do
-    {
-        if (nstlist_cmdline <= 0)
-        {
-            ir->nstlist = nstlist_try[nstlist_ind];
-        }
-
-        /* Set the pair-list buffer size in ir */
-        calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new);
-
-        /* Does rlist fit in the box? */
-        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box));
-        bDD  = TRUE;
-        if (bBox && DOMAINDECOMP(cr))
-        {
-            /* Check if rlist fits in the domain decomposition */
-            if (inputrec2nboundeddim(ir) < DIM)
-            {
-                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
-            }
-            copy_mat(box, state_tmp.box);
-            bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new);
-        }
-
-        if (debug)
-        {
-            fprintf(debug, "nstlist %d rlist %.3f bBox %d bDD %d\n",
-                    ir->nstlist, rlist_new, bBox, bDD);
-        }
-
-        bCont = FALSE;
-
-        if (nstlist_cmdline <= 0)
-        {
-            if (bBox && bDD && rlist_new <= rlist_max)
-            {
-                /* Increase nstlist */
-                nstlist_prev = ir->nstlist;
-                rlist_prev   = rlist_new;
-                bCont        = (nstlist_ind+1 < NNSTL && rlist_new < rlist_ok);
-            }
-            else
-            {
-                /* Stick with the previous nstlist */
-                ir->nstlist = nstlist_prev;
-                rlist_new   = rlist_prev;
-                bBox        = TRUE;
-                bDD         = TRUE;
-            }
-        }
-
-        nstlist_ind++;
-    }
-    while (bCont);
-
-    if (!bBox || !bDD)
-    {
-        gmx_warning(!bBox ? box_err : dd_err);
-        if (fp != NULL)
-        {
-            fprintf(fp, "\n%s\n", bBox ? box_err : dd_err);
-        }
-        ir->nstlist = nstlist_orig;
-    }
-    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
-    {
-        sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g",
-                nstlist_orig, ir->nstlist,
-                ir->rlist, rlist_new);
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "%s\n\n", buf);
-        }
-        if (fp != NULL)
-        {
-            fprintf(fp, "%s\n\n", buf);
-        }
-        ir->rlist     = rlist_new;
-        ir->rlistlong = rlist_new;
-    }
-}
-
-static void prepare_verlet_scheme(FILE                           *fplog,
-                                  t_commrec                      *cr,
-                                  t_inputrec                     *ir,
-                                  int                             nstlist_cmdline,
-                                  const gmx_mtop_t               *mtop,
-                                  matrix                          box,
-                                  gmx_bool                        bUseGPU)
-{
-    /* For NVE simulations, we will retain the initial list buffer */
-    if (EI_DYNAMICS(ir->eI) &&
-        ir->verletbuf_tol > 0 &&
-        !(EI_MD(ir->eI) && ir->etc == etcNO))
-    {
-        /* Update the Verlet buffer size for the current run setup */
-        verletbuf_list_setup_t ls;
-        real                   rlist_new;
-
-        /* Here we assume SIMD-enabled kernels are being used. But as currently
-         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
-         * and 4x2 gives a larger buffer than 4x4, this is ok.
-         */
-        verletbuf_get_list_setup(TRUE, bUseGPU, &ls);
-
-        calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new);
-
-        if (rlist_new != ir->rlist)
-        {
-            if (fplog != NULL)
-            {
-                fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
-                        ir->rlist, rlist_new,
-                        ls.cluster_size_i, ls.cluster_size_j);
-            }
-            ir->rlist     = rlist_new;
-            ir->rlistlong = rlist_new;
-        }
-    }
-
-    if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
-    {
-        gmx_fatal(FARGS, "Can not set nstlist without %s",
-                  !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
-    }
-
-    if (EI_DYNAMICS(ir->eI))
-    {
-        /* Set or try nstlist values */
-        increase_nstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, bUseGPU);
-    }
-}
-
-/* Override the value in inputrec with value passed on the command line (if any) */
-static void override_nsteps_cmdline(FILE            *fplog,
-                                    gmx_int64_t      nsteps_cmdline,
-                                    t_inputrec      *ir,
-                                    const t_commrec *cr)
-{
-    assert(ir);
-    assert(cr);
-
-    /* override with anything else than the default -2 */
-    if (nsteps_cmdline > -2)
-    {
-        char sbuf_steps[STEPSTRSIZE];
-        char sbuf_msg[STRLEN];
-
-        ir->nsteps = nsteps_cmdline;
-        if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
-        {
-            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps),
-                    fabs(nsteps_cmdline*ir->delta_t));
-        }
-        else
-        {
-            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps));
-        }
-
-        md_print_warn(cr, fplog, "%s\n", sbuf_msg);
-    }
-    else if (nsteps_cmdline < -2)
-    {
-        gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %d",
-                  nsteps_cmdline);
-    }
-    /* Do nothing if nsteps_cmdline == -2 */
-}
-
-int mdrunner(gmx_hw_opt_t *hw_opt,
-             FILE *fplog, t_commrec *cr, int nfile,
-             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
-             gmx_bool bCompact, int nstglobalcomm,
-             ivec ddxyz, int dd_node_order, real rdd, real rconstr,
-             const char *dddlb_opt, real dlb_scale,
-             const char *ddcsx, const char *ddcsy, const char *ddcsz,
-             const char *nbpu_opt, int nstlist_cmdline,
-             gmx_int64_t nsteps_cmdline, int nstepout, int resetstep,
-             int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex,
-             int repl_ex_seed, real pforce, real cpt_period, real max_hours,
-             int imdport, unsigned long Flags)
-{
-    gmx_bool                  bForceUseGPU, bTryUseGPU, bRerunMD;
-    t_inputrec               *inputrec;
-    t_state                  *state = NULL;
-    matrix                    box;
-    gmx_ddbox_t               ddbox = {0};
-    int                       npme_major, npme_minor;
-    t_nrnb                   *nrnb;
-    gmx_mtop_t               *mtop          = NULL;
-    t_mdatoms                *mdatoms       = NULL;
-    t_forcerec               *fr            = NULL;
-    t_fcdata                 *fcd           = NULL;
-    real                      ewaldcoeff_q  = 0;
-    real                      ewaldcoeff_lj = 0;
-    struct gmx_pme_t        **pmedata       = NULL;
-    gmx_vsite_t              *vsite         = NULL;
-    gmx_constr_t              constr;
-    int                       nChargePerturbed = -1, nTypePerturbed = 0, status;
-    gmx_wallcycle_t           wcycle;
-    gmx_bool                  bReadEkin;
-    gmx_walltime_accounting_t walltime_accounting = NULL;
-    int                       rc;
-    gmx_int64_t               reset_counters;
-    gmx_edsam_t               ed           = NULL;
-    int                       nthreads_pme = 1;
-    int                       nthreads_pp  = 1;
-    gmx_membed_t              membed       = NULL;
-    gmx_hw_info_t            *hwinfo       = NULL;
-    /* The master rank decides early on bUseGPU and broadcasts this later */
-    gmx_bool                  bUseGPU      = FALSE;
-
-    /* CAUTION: threads may be started later on in this function, so
-       cr doesn't reflect the final parallel state right now */
-    snew(inputrec, 1);
-    snew(mtop, 1);
-
-    if (Flags & MD_APPENDFILES)
-    {
-        fplog = NULL;
-    }
-
-    bRerunMD     = (Flags & MD_RERUN);
-    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
-    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
-
-    /* Detect hardware, gather information. This is an operation that is
-     * global for this process (MPI rank). */
-    hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);
-
-    gmx_print_detected_hardware(fplog, cr, hwinfo);
-
-    if (fplog != NULL)
-    {
-        /* Print references after all software/hardware printing */
-        please_cite(fplog, "Abraham2015");
-        please_cite(fplog, "Pall2015");
-        please_cite(fplog, "Pronk2013");
-        please_cite(fplog, "Hess2008b");
-        please_cite(fplog, "Spoel2005a");
-        please_cite(fplog, "Lindahl2001a");
-        please_cite(fplog, "Berendsen95a");
-    }
-
-    snew(state, 1);
-    if (SIMMASTER(cr))
-    {
-        /* Read (nearly) all data required for the simulation */
-        read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop);
-
-        if (inputrec->cutoff_scheme == ecutsVERLET)
-        {
-            /* Here the master rank decides if all ranks will use GPUs */
-            bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 ||
-                       getenv("GMX_EMULATE_GPU") != NULL);
-
-            /* TODO add GPU kernels for this and replace this check by:
-             * (bUseGPU && (ir->vdwtype == evdwPME &&
-             *               ir->ljpme_combination_rule == eljpmeLB))
-             * update the message text and the content of nbnxn_acceleration_supported.
-             */
-            if (bUseGPU &&
-                !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD))
-            {
-                /* Fallback message printed by nbnxn_acceleration_supported */
-                if (bForceUseGPU)
-                {
-                    gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings");
-                }
-                bUseGPU = FALSE;
-            }
-
-            prepare_verlet_scheme(fplog, cr,
-                                  inputrec, nstlist_cmdline, mtop, state->box,
-                                  bUseGPU);
-        }
-        else
-        {
-            if (nstlist_cmdline > 0)
-            {
-                gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
-            }
-
-            if (hwinfo->gpu_info.n_dev_compatible > 0)
-            {
-                md_print_warn(cr, fplog,
-                              "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
-                              "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n");
-            }
-
-            if (bForceUseGPU)
-            {
-                gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet");
-            }
-
-#ifdef GMX_TARGET_BGQ
-            md_print_warn(cr, fplog,
-                          "NOTE: There is no SIMD implementation of the group scheme kernels on\n"
-                          "      BlueGene/Q. You will observe better performance from using the\n"
-                          "      Verlet cut-off scheme.\n");
-#endif
-        }
-
-        if (inputrec->eI == eiSD2)
-        {
-            md_print_warn(cr, fplog, "The stochastic dynamics integrator %s is deprecated, since\n"
-                          "it is slower than integrator %s and is slightly less accurate\n"
-                          "with constraints. Use the %s integrator.",
-                          ei_names[inputrec->eI], ei_names[eiSD1], ei_names[eiSD1]);
-        }
-    }
-
-    /* Check and update the hardware options for internal consistency */
-    check_and_update_hw_opt_1(hw_opt, cr);
-
-    /* Early check for externally set process affinity. */
-    gmx_check_thread_affinity_set(fplog, cr,
-                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);
-
-#ifdef GMX_THREAD_MPI
-    if (SIMMASTER(cr))
-    {
-        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
-        {
-            gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks");
-        }
-
-        /* Since the master knows the cut-off scheme, update hw_opt for this.
-         * This is done later for normal MPI and also once more with tMPI
-         * for all tMPI ranks.
-         */
-        check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
-
-        /* NOW the threads will be started: */
-        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
-                                                 hw_opt,
-                                                 inputrec, mtop,
-                                                 cr, fplog, bUseGPU);
-
-        if (hw_opt->nthreads_tmpi > 1)
-        {
-            t_commrec *cr_old       = cr;
-            /* now start the threads. */
-            cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
-                                        oenv, bVerbose, bCompact, nstglobalcomm,
-                                        ddxyz, dd_node_order, rdd, rconstr,
-                                        dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
-                                        nbpu_opt, nstlist_cmdline,
-                                        nsteps_cmdline, nstepout, resetstep, nmultisim,
-                                        repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
-                                        cpt_period, max_hours,
-                                        Flags);
-            /* the main thread continues here with a new cr. We don't deallocate
-               the old cr because other threads may still be reading it. */
-            if (cr == NULL)
-            {
-                gmx_comm("Failed to spawn threads");
-            }
-        }
-    }
-#endif
-    /* END OF CAUTION: cr is now reliable */
-
-    /* g_membed initialisation *
-     * Because we change the mtop, init_membed is called before the init_parallel *
-     * (in case we ever want to make it run in parallel) */
-    if (opt2bSet("-membed", nfile, fnm))
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "Initializing membed");
-        }
-        membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);
-    }
-
-    if (PAR(cr))
-    {
-        /* now broadcast everything to the non-master nodes/threads: */
-        init_parallel(cr, inputrec, mtop);
-
-        /* The master rank decided on the use of GPUs,
-         * broadcast this information to all ranks.
-         */
-        gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr);
-    }
-
-    if (fplog != NULL)
-    {
-        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
-        fprintf(fplog, "\n");
-    }
-
-    /* now make sure the state is initialized and propagated */
-    set_state_entries(state, inputrec);
-
-    /* A parallel command line option consistency check that we can
-       only do after any threads have started. */
-    if (!PAR(cr) &&
-        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
-    {
-        gmx_fatal(FARGS,
-                  "The -dd or -npme option request a parallel simulation, "
-#ifndef GMX_MPI
-                  "but %s was compiled without threads or MPI enabled"
-#else
-#ifdef GMX_THREAD_MPI
-                  "but the number of MPI-threads (option -ntmpi) is not set or is 1"
-#else
-                  "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec"
-#endif
-#endif
-                  , output_env_get_program_display_name(oenv)
-                  );
-    }
-
-    if (bRerunMD &&
-        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
-    {
-        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
-    }
-
-    if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr))
-    {
-        gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank");
-    }
-
-    if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
-    {
-        if (cr->npmenodes > 0)
-        {
-            gmx_fatal_collective(FARGS, cr, NULL,
-                                 "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
-        }
-
-        cr->npmenodes = 0;
-    }
-
-    if (bUseGPU && cr->npmenodes < 0)
-    {
-        /* With GPUs we don't automatically use PME-only ranks. PME ranks can
-         * improve performance with many threads per GPU, since our OpenMP
-         * scaling is bad, but it's difficult to automate the setup.
-         */
-        cr->npmenodes = 0;
-    }
-
-#ifdef GMX_FAHCORE
-    if (MASTER(cr))
-    {
-        fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
-    }
-#endif
-
-    /* NMR restraints must be initialized before load_checkpoint,
-     * since with time averaging the history is added to t_state.
-     * For proper consistency check we therefore need to extend
-     * t_state here.
-     * So the PME-only nodes (if present) will also initialize
-     * the distance restraints.
-     */
-    snew(fcd, 1);
-
-    /* This needs to be called before read_checkpoint to extend the state */
-    init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0);
-
-    init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires),
-                state);
-
-    if (DEFORM(*inputrec))
-    {
-        /* Store the deform reference box before reading the checkpoint */
-        if (SIMMASTER(cr))
-        {
-            copy_mat(state->box, box);
-        }
-        if (PAR(cr))
-        {
-            gmx_bcast(sizeof(box), box, cr);
-        }
-        /* Because we do not have the update struct available yet
-         * in which the reference values should be stored,
-         * we store them temporarily in static variables.
-         * This should be thread safe, since they are only written once
-         * and with identical values.
-         */
-        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
-        deform_init_init_step_tpx = inputrec->init_step;
-        copy_mat(box, deform_init_box_tpx);
-        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
-    }
-
-    if (opt2bSet("-cpi", nfile, fnm))
-    {
-        /* Check if checkpoint file exists before doing continuation.
-         * This way we can use identical input options for the first and subsequent runs...
-         */
-        if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )
-        {
-            load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
-                            cr, ddxyz,
-                            inputrec, state, &bReadEkin,
-                            (Flags & MD_APPENDFILES),
-                            (Flags & MD_APPENDFILESSET));
-
-            if (bReadEkin)
-            {
-                Flags |= MD_READ_EKIN;
-            }
-        }
-    }
-
-    if (MASTER(cr) && (Flags & MD_APPENDFILES))
-    {
-        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr,
-                     Flags, &fplog);
-    }
-
-    /* override nsteps with value from cmdline */
-    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
-
-    if (SIMMASTER(cr))
-    {
-        copy_mat(state->box, box);
-    }
-
-    if (PAR(cr))
-    {
-        gmx_bcast(sizeof(box), box, cr);
-    }
-
-    /* Essential dynamics */
-    if (opt2bSet("-ei", nfile, fnm))
-    {
-        /* Open input and output files, allocate space for ED data structure */
-        ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr);
-    }
-
-    if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
-                     inputrec->eI == eiNM))
-    {
-        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,
-                                           dddlb_opt, dlb_scale,
-                                           ddcsx, ddcsy, ddcsz,
-                                           mtop, inputrec,
-                                           box, state->x,
-                                           &ddbox, &npme_major, &npme_minor);
-
-        make_dd_communicators(fplog, cr, dd_node_order);
-
-        /* Set overallocation to avoid frequent reallocation of arrays */
-        set_over_alloc_dd(TRUE);
-    }
-    else
-    {
-        /* PME, if used, is done on all nodes with 1D decomposition */
-        cr->npmenodes = 0;
-        cr->duty      = (DUTY_PP | DUTY_PME);
-        npme_major    = 1;
-        npme_minor    = 1;
-
-        if (inputrec->ePBC == epbcSCREW)
-        {
-            gmx_fatal(FARGS,
-                      "pbc=%s is only implemented with domain decomposition",
-                      epbc_names[inputrec->ePBC]);
-        }
-    }
-
-    if (PAR(cr))
-    {
-        /* After possible communicator splitting in make_dd_communicators.
-         * we can set up the intra/inter node communication.
-         */
-        gmx_setup_nodecomm(fplog, cr);
-    }
-
-    /* Initialize per-physical-node MPI process/thread ID and counters. */
-    gmx_init_intranode_counters(cr);
-#ifdef GMX_MPI
-    if (MULTISIM(cr))
-    {
-        md_print_info(cr, fplog,
-                      "This is simulation %d out of %d running as a composite GROMACS\n"
-                      "multi-simulation job. Setup for this simulation:\n\n",
-                      cr->ms->sim, cr->ms->nsim);
-    }
-    md_print_info(cr, fplog, "Using %d MPI %s\n",
-                  cr->nnodes,
-#ifdef GMX_THREAD_MPI
-                  cr->nnodes == 1 ? "thread" : "threads"
-#else
-                  cr->nnodes == 1 ? "process" : "processes"
-#endif
-                  );
-    fflush(stderr);
-#endif
-
-    /* Check and update hw_opt for the cut-off scheme */
-    check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
-
-    /* Check and update hw_opt for the number of MPI ranks */
-    check_and_update_hw_opt_3(hw_opt);
-
-    gmx_omp_nthreads_init(fplog, cr,
-                          hwinfo->nthreads_hw_avail,
-                          hw_opt->nthreads_omp,
-                          hw_opt->nthreads_omp_pme,
-                          (cr->duty & DUTY_PP) == 0,
-                          inputrec->cutoff_scheme == ecutsVERLET);
-
-#ifndef NDEBUG
-    if (integrator[inputrec->eI].func != do_tpi &&
-        inputrec->cutoff_scheme == ecutsVERLET)
-    {
-        gmx_feenableexcept();
-    }
-#endif
-
-    if (bUseGPU)
-    {
-        /* Select GPU id's to use */
-        gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,
-                           &hw_opt->gpu_opt);
-    }
-    else
-    {
-        /* Ignore (potentially) manually selected GPUs */
-        hw_opt->gpu_opt.n_dev_use = 0;
-    }
-
-    /* check consistency across ranks of things like SIMD
-     * support and number of GPUs selected */
-    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);
-
-    /* Now that we know the setup is consistent, check for efficiency */
-    check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET,
-                                       cr, fplog);
-
-    if (DOMAINDECOMP(cr))
-    {
-        /* When we share GPUs over ranks, we need to know this for the DLB */
-        dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);
-    }
-
-    /* getting number of PP/PME threads
-       PME: env variable should be read only on one node to make sure it is
-       identical everywhere;
-     */
-    /* TODO nthreads_pp is only used for pinning threads.
-     * This is a temporary solution until we have a hw topology library.
-     */
-    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
-    nthreads_pme = gmx_omp_nthreads_get(emntPME);
-
-    wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);
-
-    if (PAR(cr))
-    {
-        /* Master synchronizes its value of reset_counters with all nodes
-         * including PME only nodes */
-        reset_counters = wcycle_get_reset_counters(wcycle);
-        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
-        wcycle_set_reset_counters(wcycle, reset_counters);
-    }
-
-    snew(nrnb, 1);
-    if (cr->duty & DUTY_PP)
-    {
-        bcast_state(cr, state);
-
-        /* Initiate forcerecord */
-        fr          = mk_forcerec();
-        fr->hwinfo  = hwinfo;
-        fr->gpu_opt = &hw_opt->gpu_opt;
-        init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box,
-                      opt2fn("-table", nfile, fnm),
-                      opt2fn("-tabletf", nfile, fnm),
-                      opt2fn("-tablep", nfile, fnm),
-                      getFilenm("-tableb", nfile, fnm),
-                      nbpu_opt,
-                      FALSE,
-                      pforce);
-
-        /* version for PCA_NOT_READ_NODE (see md.c) */
-        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
-           "nofile","nofile","nofile","nofile",FALSE,pforce);
-         */
-
-        /* Initialize QM-MM */
-        if (fr->bQMMM)
-        {
-            init_QMMMrec(cr, mtop, inputrec, fr);
-        }
-
-        /* Initialize the mdatoms structure.
-         * mdatoms is not filled with atom data,
-         * as this can not be done now with domain decomposition.
-         */
-        mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO);
-
-        /* Initialize the virtual site communication */
-        vsite = init_vsite(mtop, cr, FALSE);
-
-        calc_shifts(box, fr->shift_vec);
-
-        /* With periodic molecules the charge groups should be whole at start up
-         * and the virtual sites should not be far from their proper positions.
-         */
-        if (!inputrec->bContinuation && MASTER(cr) &&
-            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
-        {
-            /* Make molecules whole at start of run */
-            if (fr->ePBC != epbcNONE)
-            {
-                do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x);
-            }
-            if (vsite)
-            {
-                /* Correct initial vsite positions are required
-                 * for the initial distribution in the domain decomposition
-                 * and for the initial shell prediction.
-                 */
-                construct_vsites_mtop(vsite, mtop, state->x);
-            }
-        }
-
-        if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))
-        {
-            ewaldcoeff_q  = fr->ewaldcoeff_q;
-            ewaldcoeff_lj = fr->ewaldcoeff_lj;
-            pmedata       = &fr->pmedata;
-        }
-        else
-        {
-            pmedata = NULL;
-        }
-    }
-    else
-    {
-        /* This is a PME only node */
-
-        /* We don't need the state */
-        done_state(state);
-
-        ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
-        ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
-        snew(pmedata, 1);
-    }
-
-    if (hw_opt->thread_affinity != threadaffOFF)
-    {
-        /* Before setting affinity, check whether the affinity has changed
-         * - which indicates that probably the OpenMP library has changed it
-         * since we first checked).
-         */
-        gmx_check_thread_affinity_set(fplog, cr,
-                                      hw_opt, hwinfo->nthreads_hw_avail, TRUE);
-
-        /* Set the CPU affinity */
-        gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo);
-    }
-
-    /* Initiate PME if necessary,
-     * either on all nodes or on dedicated PME nodes only. */
-    if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
-    {
-        if (mdatoms)
-        {
-            nChargePerturbed = mdatoms->nChargePerturbed;
-            if (EVDW_PME(inputrec->vdwtype))
-            {
-                nTypePerturbed   = mdatoms->nTypePerturbed;
-            }
-        }
-        if (cr->npmenodes > 0)
-        {
-            /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
-            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
-            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
-        }
-
-        if (cr->duty & DUTY_PME)
-        {
-            status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec,
-                                  mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,
-                                  (Flags & MD_REPRODUCIBLE), nthreads_pme);
-            if (status != 0)
-            {
-                gmx_fatal(FARGS, "Error %d initializing PME", status);
-            }
-        }
-    }
-
-
-    if (integrator[inputrec->eI].func == do_md)
-    {
-        /* Turn on signal handling on all nodes */
-        /*
-         * (A user signal from the PME nodes (if any)
-         * is communicated to the PP nodes.
-         */
-        signal_handler_install();
-    }
-
-    if (cr->duty & DUTY_PP)
-    {
-        /* Assumes uniform use of the number of OpenMP threads */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
-
-        if (inputrec->bPull)
-        {
-            /* Initialize pull code */
-            inputrec->pull_work =
-                init_pull(fplog, inputrec->pull, inputrec, nfile, fnm,
-                          mtop, cr, oenv, inputrec->fepvals->init_lambda,
-                          EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags);
-        }
-
-        if (inputrec->bRot)
-        {
-            /* Initialize enforced rotation code */
-            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,
-                     bVerbose, Flags);
-        }
-
-        if (inputrec->eSwapCoords != eswapNO)
-        {
-            /* Initialize ion swapping code */
-            init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr),
-                            mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags);
-        }
-
-        constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);
-
-        if (DOMAINDECOMP(cr))
-        {
-            GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
-            dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
-                            Flags & MD_DDBONDCHECK, fr->cginfo_mb);
-
-            set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox);
-
-            setup_dd_grid(fplog, cr->dd);
-        }
-
-        /* Now do whatever the user wants us to do (how flexible...) */
-        integrator[inputrec->eI].func(fplog, cr, nfile, fnm,
-                                      oenv, bVerbose, bCompact,
-                                      nstglobalcomm,
-                                      vsite, constr,
-                                      nstepout, inputrec, mtop,
-                                      fcd, state,
-                                      mdatoms, nrnb, wcycle, ed, fr,
-                                      repl_ex_nst, repl_ex_nex, repl_ex_seed,
-                                      membed,
-                                      cpt_period, max_hours,
-                                      imdport,
-                                      Flags,
-                                      walltime_accounting);
-
-        if (inputrec->bPull)
-        {
-            finish_pull(inputrec->pull_work);
-        }
-
-        if (inputrec->bRot)
-        {
-            finish_rot(inputrec->rot);
-        }
-
-    }
-    else
-    {
-        GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
-        /* do PME only */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
-        gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);
-    }
-
-    wallcycle_stop(wcycle, ewcRUN);
-
-    /* Finish up, write some stuff
-     * if rerunMD, don't write last frame again
-     */
-    finish_run(fplog, cr,
-               inputrec, nrnb, wcycle, walltime_accounting,
-               fr ? fr->nbv : NULL,
-               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
-
-
-    /* Free GPU memory and context */
-    free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL);
-
-    if (opt2bSet("-membed", nfile, fnm))
-    {
-        sfree(membed);
-    }
-
-    gmx_hardware_info_free(hwinfo);
-
-    /* Does what it says */
-    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
-    walltime_accounting_destroy(walltime_accounting);
-
-    /* PLUMED */
-    if(plumedswitch){
-      plumed_finalize(plumedmain);
-    }
-    /* END PLUMED */
-
-    /* Close logfile already here if we were appending to it */
-    if (MASTER(cr) && (Flags & MD_APPENDFILES))
-    {
-        gmx_log_close(fplog);
-    }
-
-    rc = (int)gmx_get_stop_condition();
-
-    done_ed(&ed);
-
-#ifdef GMX_THREAD_MPI
-    /* we need to join all threads. The sub-threads join when they
-       exit this function, but the master thread needs to be told to
-       wait for that. */
-    if (PAR(cr) && MASTER(cr))
-    {
-        tMPI_Finalize();
-    }
-#endif
-
-    return rc;
-}
diff --git a/patches/gromacs-5.1.4.diff/src/programs/mdrun/runner.cpp.preplumed b/patches/gromacs-5.1.4.diff/src/programs/mdrun/runner.cpp.preplumed
deleted file mode 100644
index 7489b07dcbee6c6f730415ef166ed857afaffe4b..0000000000000000000000000000000000000000
--- a/patches/gromacs-5.1.4.diff/src/programs/mdrun/runner.cpp.preplumed
+++ /dev/null
@@ -1,1351 +0,0 @@
-/*
- * This file is part of the GROMACS molecular simulation package.
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team.
- * Copyright (c) 2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
- * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
- * and including many others, as listed in the AUTHORS file in the
- * top-level source directory and at http://www.gromacs.org.
- *
- * GROMACS is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * GROMACS is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GROMACS; if not, see
- * http://www.gnu.org/licenses, or write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
- *
- * If you want to redistribute modifications to GROMACS, please
- * consider that scientific software is very special. Version
- * control is crucial - bugs must be traceable. We will be happy to
- * consider code for inclusion in the official distribution, but
- * derived work must not be called official GROMACS. Details are found
- * in the README & COPYING files - if they are missing, get the
- * official version at http://www.gromacs.org.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org.
- */
-
-#include "gmxpre.h"
-
-#include "config.h"
-
-#include <assert.h>
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <algorithm>
-
-#include "gromacs/domdec/domdec.h"
-#include "gromacs/essentialdynamics/edsam.h"
-#include "gromacs/ewald/pme.h"
-#include "gromacs/fileio/tpxio.h"
-#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
-#include "gromacs/legacyheaders/checkpoint.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/copyrite.h"
-#include "gromacs/legacyheaders/disre.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/gmx_detect_hardware.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/gmx_thread_affinity.h"
-#include "gromacs/legacyheaders/inputrec.h"
-#include "gromacs/legacyheaders/main.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/oenv.h"
-#include "gromacs/legacyheaders/orires.h"
-#include "gromacs/legacyheaders/qmmm.h"
-#include "gromacs/legacyheaders/sighandler.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/math/calculate-ewald-splitting-coefficient.h"
-#include "gromacs/math/vec.h"
-#include "gromacs/mdlib/calc_verletbuf.h"
-#include "gromacs/mdlib/nbnxn_search.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pulling/pull.h"
-#include "gromacs/pulling/pull_rotation.h"
-#include "gromacs/swap/swapcoords.h"
-#include "gromacs/timing/wallcycle.h"
-#include "gromacs/topology/mtop_util.h"
-#include "gromacs/utility/gmxassert.h"
-#include "gromacs/utility/gmxmpi.h"
-#include "gromacs/utility/smalloc.h"
-
-#include "deform.h"
-#include "membed.h"
-#include "repl_ex.h"
-#include "resource-division.h"
-
-#ifdef GMX_FAHCORE
-#include "corewrap.h"
-#endif
-
-typedef struct {
-    gmx_integrator_t *func;
-} gmx_intp_t;
-
-/* The array should match the eI array in include/types/enums.h */
-const gmx_intp_t    integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md}, {do_md}};
-
-gmx_int64_t         deform_init_init_step_tpx;
-matrix              deform_init_box_tpx;
-tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
-
-
-#ifdef GMX_THREAD_MPI
-/* The minimum number of atoms per tMPI thread. With fewer atoms than this,
- * the number of threads will get lowered.
- */
-#define MIN_ATOMS_PER_MPI_THREAD    90
-#define MIN_ATOMS_PER_GPU           900
-
-struct mdrunner_arglist
-{
-    gmx_hw_opt_t    hw_opt;
-    FILE           *fplog;
-    t_commrec      *cr;
-    int             nfile;
-    const t_filenm *fnm;
-    output_env_t    oenv;
-    gmx_bool        bVerbose;
-    gmx_bool        bCompact;
-    int             nstglobalcomm;
-    ivec            ddxyz;
-    int             dd_node_order;
-    real            rdd;
-    real            rconstr;
-    const char     *dddlb_opt;
-    real            dlb_scale;
-    const char     *ddcsx;
-    const char     *ddcsy;
-    const char     *ddcsz;
-    const char     *nbpu_opt;
-    int             nstlist_cmdline;
-    gmx_int64_t     nsteps_cmdline;
-    int             nstepout;
-    int             resetstep;
-    int             nmultisim;
-    int             repl_ex_nst;
-    int             repl_ex_nex;
-    int             repl_ex_seed;
-    real            pforce;
-    real            cpt_period;
-    real            max_hours;
-    int             imdport;
-    unsigned long   Flags;
-};
-
-
-/* The function used for spawning threads. Extracts the mdrunner()
-   arguments from its one argument and calls mdrunner(), after making
-   a commrec. */
-static void mdrunner_start_fn(void *arg)
-{
-    struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg;
-    struct mdrunner_arglist  mc  = *mda; /* copy the arg list to make sure
-                                            that it's thread-local. This doesn't
-                                            copy pointed-to items, of course,
-                                            but those are all const. */
-    t_commrec *cr;                       /* we need a local version of this */
-    FILE      *fplog = NULL;
-    t_filenm  *fnm;
-
-    fnm = dup_tfn(mc.nfile, mc.fnm);
-
-    cr = reinitialize_commrec_for_this_thread(mc.cr);
-
-    if (MASTER(cr))
-    {
-        fplog = mc.fplog;
-    }
-
-    mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
-             mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
-             mc.ddxyz, mc.dd_node_order, mc.rdd,
-             mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
-             mc.ddcsx, mc.ddcsy, mc.ddcsz,
-             mc.nbpu_opt, mc.nstlist_cmdline,
-             mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
-             mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
-             mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags);
-}
-
-/* called by mdrunner() to start a specific number of threads (including
-   the main thread) for thread-parallel runs. This in turn calls mdrunner()
-   for each thread.
-   All options besides nthreads are the same as for mdrunner(). */
-static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
-                                         FILE *fplog, t_commrec *cr, int nfile,
-                                         const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
-                                         gmx_bool bCompact, int nstglobalcomm,
-                                         ivec ddxyz, int dd_node_order, real rdd, real rconstr,
-                                         const char *dddlb_opt, real dlb_scale,
-                                         const char *ddcsx, const char *ddcsy, const char *ddcsz,
-                                         const char *nbpu_opt, int nstlist_cmdline,
-                                         gmx_int64_t nsteps_cmdline,
-                                         int nstepout, int resetstep,
-                                         int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
-                                         real pforce, real cpt_period, real max_hours,
-                                         unsigned long Flags)
-{
-    int                      ret;
-    struct mdrunner_arglist *mda;
-    t_commrec               *crn; /* the new commrec */
-    t_filenm                *fnmn;
-
-    /* first check whether we even need to start tMPI */
-    if (hw_opt->nthreads_tmpi < 2)
-    {
-        return cr;
-    }
-
-    /* a few small, one-time, almost unavoidable memory leaks: */
-    snew(mda, 1);
-    fnmn = dup_tfn(nfile, fnm);
-
-    /* fill the data structure to pass as void pointer to thread start fn */
-    /* hw_opt contains pointers, which should all be NULL at this stage */
-    mda->hw_opt          = *hw_opt;
-    mda->fplog           = fplog;
-    mda->cr              = cr;
-    mda->nfile           = nfile;
-    mda->fnm             = fnmn;
-    mda->oenv            = oenv;
-    mda->bVerbose        = bVerbose;
-    mda->bCompact        = bCompact;
-    mda->nstglobalcomm   = nstglobalcomm;
-    mda->ddxyz[XX]       = ddxyz[XX];
-    mda->ddxyz[YY]       = ddxyz[YY];
-    mda->ddxyz[ZZ]       = ddxyz[ZZ];
-    mda->dd_node_order   = dd_node_order;
-    mda->rdd             = rdd;
-    mda->rconstr         = rconstr;
-    mda->dddlb_opt       = dddlb_opt;
-    mda->dlb_scale       = dlb_scale;
-    mda->ddcsx           = ddcsx;
-    mda->ddcsy           = ddcsy;
-    mda->ddcsz           = ddcsz;
-    mda->nbpu_opt        = nbpu_opt;
-    mda->nstlist_cmdline = nstlist_cmdline;
-    mda->nsteps_cmdline  = nsteps_cmdline;
-    mda->nstepout        = nstepout;
-    mda->resetstep       = resetstep;
-    mda->nmultisim       = nmultisim;
-    mda->repl_ex_nst     = repl_ex_nst;
-    mda->repl_ex_nex     = repl_ex_nex;
-    mda->repl_ex_seed    = repl_ex_seed;
-    mda->pforce          = pforce;
-    mda->cpt_period      = cpt_period;
-    mda->max_hours       = max_hours;
-    mda->Flags           = Flags;
-
-    /* now spawn new threads that start mdrunner_start_fn(), while
-       the main thread returns, we set thread affinity later */
-    ret = tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE,
-                       mdrunner_start_fn, (void*)(mda) );
-    if (ret != TMPI_SUCCESS)
-    {
-        return NULL;
-    }
-
-    crn = reinitialize_commrec_for_this_thread(cr);
-    return crn;
-}
-
-#endif /* GMX_THREAD_MPI */
-
-
-/* We determine the extra cost of the non-bonded kernels compared to
- * a reference nstlist value of 10 (which is the default in grompp).
- */
-static const int    nbnxnReferenceNstlist = 10;
-/* The values to try when switching  */
-const int           nstlist_try[] = { 20, 25, 40 };
-#define NNSTL  sizeof(nstlist_try)/sizeof(nstlist_try[0])
-/* Increase nstlist until the non-bonded cost increases more than listfac_ok,
- * but never more than listfac_max.
- * A standard (protein+)water system at 300K with PME ewald_rtol=1e-5
- * needs 1.28 at rcoulomb=0.9 and 1.24 at rcoulomb=1.0 to get to nstlist=40.
- * Note that both CPU and GPU factors are conservative. Performance should
- * not go down due to this tuning, except with a relatively slow GPU.
- * On the other hand, at medium/high parallelization or with fast GPUs
- * nstlist will not be increased enough to reach optimal performance.
- */
-/* CPU: pair-search is about a factor 1.5 slower than the non-bonded kernel */
-static const float  nbnxn_cpu_listfac_ok    = 1.05;
-static const float  nbnxn_cpu_listfac_max   = 1.09;
-/* GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel */
-static const float  nbnxn_gpu_listfac_ok    = 1.20;
-static const float  nbnxn_gpu_listfac_max   = 1.30;
-
-/* Try to increase nstlist when using the Verlet cut-off scheme */
-static void increase_nstlist(FILE *fp, t_commrec *cr,
-                             t_inputrec *ir, int nstlist_cmdline,
-                             const gmx_mtop_t *mtop, matrix box,
-                             gmx_bool bGPU)
-{
-    float                  listfac_ok, listfac_max;
-    int                    nstlist_orig, nstlist_prev;
-    verletbuf_list_setup_t ls;
-    real                   rlistWithReferenceNstlist, rlist_inc, rlist_ok, rlist_max;
-    real                   rlist_new, rlist_prev;
-    size_t                 nstlist_ind = 0;
-    t_state                state_tmp;
-    gmx_bool               bBox, bDD, bCont;
-    const char            *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
-    const char            *nve_err  = "Can not increase nstlist because an NVE ensemble is used";
-    const char            *vbd_err  = "Can not increase nstlist because verlet-buffer-tolerance is not set or used";
-    const char            *box_err  = "Can not increase nstlist because the box is too small";
-    const char            *dd_err   = "Can not increase nstlist because of domain decomposition limitations";
-    char                   buf[STRLEN];
-    const float            oneThird = 1.0f / 3.0f;
-
-    if (nstlist_cmdline <= 0)
-    {
-        if (ir->nstlist == 1)
-        {
-            /* The user probably set nstlist=1 for a reason,
-             * don't mess with the settings.
-             */
-            return;
-        }
-
-        if (fp != NULL && bGPU && ir->nstlist < nstlist_try[0])
-        {
-            fprintf(fp, nstl_gpu, ir->nstlist);
-        }
-        nstlist_ind = 0;
-        while (nstlist_ind < NNSTL && ir->nstlist >= nstlist_try[nstlist_ind])
-        {
-            nstlist_ind++;
-        }
-        if (nstlist_ind == NNSTL)
-        {
-            /* There are no larger nstlist value to try */
-            return;
-        }
-    }
-
-    if (EI_MD(ir->eI) && ir->etc == etcNO)
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "%s\n", nve_err);
-        }
-        if (fp != NULL)
-        {
-            fprintf(fp, "%s\n", nve_err);
-        }
-
-        return;
-    }
-
-    if (ir->verletbuf_tol == 0 && bGPU)
-    {
-        gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
-    }
-
-    if (ir->verletbuf_tol < 0)
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "%s\n", vbd_err);
-        }
-        if (fp != NULL)
-        {
-            fprintf(fp, "%s\n", vbd_err);
-        }
-
-        return;
-    }
-
-    if (bGPU)
-    {
-        listfac_ok  = nbnxn_gpu_listfac_ok;
-        listfac_max = nbnxn_gpu_listfac_max;
-    }
-    else
-    {
-        listfac_ok  = nbnxn_cpu_listfac_ok;
-        listfac_max = nbnxn_cpu_listfac_max;
-    }
-
-    nstlist_orig = ir->nstlist;
-    if (nstlist_cmdline > 0)
-    {
-        if (fp)
-        {
-            sprintf(buf, "Getting nstlist=%d from command line option",
-                    nstlist_cmdline);
-        }
-        ir->nstlist = nstlist_cmdline;
-    }
-
-    verletbuf_get_list_setup(TRUE, bGPU, &ls);
-
-    /* Allow rlist to make the list a given factor larger than the list
-     * would be with the reference value for nstlist (10).
-     */
-    nstlist_prev = ir->nstlist;
-    ir->nstlist  = nbnxnReferenceNstlist;
-    calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL,
-                            &rlistWithReferenceNstlist);
-    ir->nstlist  = nstlist_prev;
-
-    /* Determine the pair list size increase due to zero interactions */
-    rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j,
-                                              mtop->natoms/det(box));
-    rlist_ok  = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_ok, oneThird) - rlist_inc;
-    rlist_max = (rlistWithReferenceNstlist + rlist_inc)*pow(listfac_max, oneThird) - rlist_inc;
-    if (debug)
-    {
-        fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n",
-                rlist_inc, rlist_ok, rlist_max);
-    }
-
-    nstlist_prev = nstlist_orig;
-    rlist_prev   = ir->rlist;
-    do
-    {
-        if (nstlist_cmdline <= 0)
-        {
-            ir->nstlist = nstlist_try[nstlist_ind];
-        }
-
-        /* Set the pair-list buffer size in ir */
-        calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new);
-
-        /* Does rlist fit in the box? */
-        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box));
-        bDD  = TRUE;
-        if (bBox && DOMAINDECOMP(cr))
-        {
-            /* Check if rlist fits in the domain decomposition */
-            if (inputrec2nboundeddim(ir) < DIM)
-            {
-                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
-            }
-            copy_mat(box, state_tmp.box);
-            bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new);
-        }
-
-        if (debug)
-        {
-            fprintf(debug, "nstlist %d rlist %.3f bBox %d bDD %d\n",
-                    ir->nstlist, rlist_new, bBox, bDD);
-        }
-
-        bCont = FALSE;
-
-        if (nstlist_cmdline <= 0)
-        {
-            if (bBox && bDD && rlist_new <= rlist_max)
-            {
-                /* Increase nstlist */
-                nstlist_prev = ir->nstlist;
-                rlist_prev   = rlist_new;
-                bCont        = (nstlist_ind+1 < NNSTL && rlist_new < rlist_ok);
-            }
-            else
-            {
-                /* Stick with the previous nstlist */
-                ir->nstlist = nstlist_prev;
-                rlist_new   = rlist_prev;
-                bBox        = TRUE;
-                bDD         = TRUE;
-            }
-        }
-
-        nstlist_ind++;
-    }
-    while (bCont);
-
-    if (!bBox || !bDD)
-    {
-        gmx_warning(!bBox ? box_err : dd_err);
-        if (fp != NULL)
-        {
-            fprintf(fp, "\n%s\n", bBox ? box_err : dd_err);
-        }
-        ir->nstlist = nstlist_orig;
-    }
-    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
-    {
-        sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g",
-                nstlist_orig, ir->nstlist,
-                ir->rlist, rlist_new);
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "%s\n\n", buf);
-        }
-        if (fp != NULL)
-        {
-            fprintf(fp, "%s\n\n", buf);
-        }
-        ir->rlist     = rlist_new;
-        ir->rlistlong = rlist_new;
-    }
-}
-
-static void prepare_verlet_scheme(FILE                           *fplog,
-                                  t_commrec                      *cr,
-                                  t_inputrec                     *ir,
-                                  int                             nstlist_cmdline,
-                                  const gmx_mtop_t               *mtop,
-                                  matrix                          box,
-                                  gmx_bool                        bUseGPU)
-{
-    /* For NVE simulations, we will retain the initial list buffer */
-    if (EI_DYNAMICS(ir->eI) &&
-        ir->verletbuf_tol > 0 &&
-        !(EI_MD(ir->eI) && ir->etc == etcNO))
-    {
-        /* Update the Verlet buffer size for the current run setup */
-        verletbuf_list_setup_t ls;
-        real                   rlist_new;
-
-        /* Here we assume SIMD-enabled kernels are being used. But as currently
-         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
-         * and 4x2 gives a larger buffer than 4x4, this is ok.
-         */
-        verletbuf_get_list_setup(TRUE, bUseGPU, &ls);
-
-        calc_verlet_buffer_size(mtop, det(box), ir, -1, &ls, NULL, &rlist_new);
-
-        if (rlist_new != ir->rlist)
-        {
-            if (fplog != NULL)
-            {
-                fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
-                        ir->rlist, rlist_new,
-                        ls.cluster_size_i, ls.cluster_size_j);
-            }
-            ir->rlist     = rlist_new;
-            ir->rlistlong = rlist_new;
-        }
-    }
-
-    if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
-    {
-        gmx_fatal(FARGS, "Can not set nstlist without %s",
-                  !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
-    }
-
-    if (EI_DYNAMICS(ir->eI))
-    {
-        /* Set or try nstlist values */
-        increase_nstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, bUseGPU);
-    }
-}
-
-/* Override the value in inputrec with value passed on the command line (if any) */
-static void override_nsteps_cmdline(FILE            *fplog,
-                                    gmx_int64_t      nsteps_cmdline,
-                                    t_inputrec      *ir,
-                                    const t_commrec *cr)
-{
-    assert(ir);
-    assert(cr);
-
-    /* override with anything else than the default -2 */
-    if (nsteps_cmdline > -2)
-    {
-        char sbuf_steps[STEPSTRSIZE];
-        char sbuf_msg[STRLEN];
-
-        ir->nsteps = nsteps_cmdline;
-        if (EI_DYNAMICS(ir->eI) && nsteps_cmdline != -1)
-        {
-            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps, %.3g ps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps),
-                    fabs(nsteps_cmdline*ir->delta_t));
-        }
-        else
-        {
-            sprintf(sbuf_msg, "Overriding nsteps with value passed on the command line: %s steps",
-                    gmx_step_str(nsteps_cmdline, sbuf_steps));
-        }
-
-        md_print_warn(cr, fplog, "%s\n", sbuf_msg);
-    }
-    else if (nsteps_cmdline < -2)
-    {
-        gmx_fatal(FARGS, "Invalid nsteps value passed on the command line: %d",
-                  nsteps_cmdline);
-    }
-    /* Do nothing if nsteps_cmdline == -2 */
-}
-
-int mdrunner(gmx_hw_opt_t *hw_opt,
-             FILE *fplog, t_commrec *cr, int nfile,
-             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
-             gmx_bool bCompact, int nstglobalcomm,
-             ivec ddxyz, int dd_node_order, real rdd, real rconstr,
-             const char *dddlb_opt, real dlb_scale,
-             const char *ddcsx, const char *ddcsy, const char *ddcsz,
-             const char *nbpu_opt, int nstlist_cmdline,
-             gmx_int64_t nsteps_cmdline, int nstepout, int resetstep,
-             int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex,
-             int repl_ex_seed, real pforce, real cpt_period, real max_hours,
-             int imdport, unsigned long Flags)
-{
-    gmx_bool                  bForceUseGPU, bTryUseGPU, bRerunMD;
-    t_inputrec               *inputrec;
-    t_state                  *state = NULL;
-    matrix                    box;
-    gmx_ddbox_t               ddbox = {0};
-    int                       npme_major, npme_minor;
-    t_nrnb                   *nrnb;
-    gmx_mtop_t               *mtop          = NULL;
-    t_mdatoms                *mdatoms       = NULL;
-    t_forcerec               *fr            = NULL;
-    t_fcdata                 *fcd           = NULL;
-    real                      ewaldcoeff_q  = 0;
-    real                      ewaldcoeff_lj = 0;
-    struct gmx_pme_t        **pmedata       = NULL;
-    gmx_vsite_t              *vsite         = NULL;
-    gmx_constr_t              constr;
-    int                       nChargePerturbed = -1, nTypePerturbed = 0, status;
-    gmx_wallcycle_t           wcycle;
-    gmx_bool                  bReadEkin;
-    gmx_walltime_accounting_t walltime_accounting = NULL;
-    int                       rc;
-    gmx_int64_t               reset_counters;
-    gmx_edsam_t               ed           = NULL;
-    int                       nthreads_pme = 1;
-    int                       nthreads_pp  = 1;
-    gmx_membed_t              membed       = NULL;
-    gmx_hw_info_t            *hwinfo       = NULL;
-    /* The master rank decides early on bUseGPU and broadcasts this later */
-    gmx_bool                  bUseGPU      = FALSE;
-
-    /* CAUTION: threads may be started later on in this function, so
-       cr doesn't reflect the final parallel state right now */
-    snew(inputrec, 1);
-    snew(mtop, 1);
-
-    if (Flags & MD_APPENDFILES)
-    {
-        fplog = NULL;
-    }
-
-    bRerunMD     = (Flags & MD_RERUN);
-    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
-    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
-
-    /* Detect hardware, gather information. This is an operation that is
-     * global for this process (MPI rank). */
-    hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);
-
-    gmx_print_detected_hardware(fplog, cr, hwinfo);
-
-    if (fplog != NULL)
-    {
-        /* Print references after all software/hardware printing */
-        please_cite(fplog, "Abraham2015");
-        please_cite(fplog, "Pall2015");
-        please_cite(fplog, "Pronk2013");
-        please_cite(fplog, "Hess2008b");
-        please_cite(fplog, "Spoel2005a");
-        please_cite(fplog, "Lindahl2001a");
-        please_cite(fplog, "Berendsen95a");
-    }
-
-    snew(state, 1);
-    if (SIMMASTER(cr))
-    {
-        /* Read (nearly) all data required for the simulation */
-        read_tpx_state(ftp2fn(efTPR, nfile, fnm), inputrec, state, NULL, mtop);
-
-        if (inputrec->cutoff_scheme == ecutsVERLET)
-        {
-            /* Here the master rank decides if all ranks will use GPUs */
-            bUseGPU = (hwinfo->gpu_info.n_dev_compatible > 0 ||
-                       getenv("GMX_EMULATE_GPU") != NULL);
-
-            /* TODO add GPU kernels for this and replace this check by:
-             * (bUseGPU && (ir->vdwtype == evdwPME &&
-             *               ir->ljpme_combination_rule == eljpmeLB))
-             * update the message text and the content of nbnxn_acceleration_supported.
-             */
-            if (bUseGPU &&
-                !nbnxn_gpu_acceleration_supported(fplog, cr, inputrec, bRerunMD))
-            {
-                /* Fallback message printed by nbnxn_acceleration_supported */
-                if (bForceUseGPU)
-                {
-                    gmx_fatal(FARGS, "GPU acceleration requested, but not supported with the given input settings");
-                }
-                bUseGPU = FALSE;
-            }
-
-            prepare_verlet_scheme(fplog, cr,
-                                  inputrec, nstlist_cmdline, mtop, state->box,
-                                  bUseGPU);
-        }
-        else
-        {
-            if (nstlist_cmdline > 0)
-            {
-                gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
-            }
-
-            if (hwinfo->gpu_info.n_dev_compatible > 0)
-            {
-                md_print_warn(cr, fplog,
-                              "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
-                              "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n");
-            }
-
-            if (bForceUseGPU)
-            {
-                gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet");
-            }
-
-#ifdef GMX_TARGET_BGQ
-            md_print_warn(cr, fplog,
-                          "NOTE: There is no SIMD implementation of the group scheme kernels on\n"
-                          "      BlueGene/Q. You will observe better performance from using the\n"
-                          "      Verlet cut-off scheme.\n");
-#endif
-        }
-
-        if (inputrec->eI == eiSD2)
-        {
-            md_print_warn(cr, fplog, "The stochastic dynamics integrator %s is deprecated, since\n"
-                          "it is slower than integrator %s and is slightly less accurate\n"
-                          "with constraints. Use the %s integrator.",
-                          ei_names[inputrec->eI], ei_names[eiSD1], ei_names[eiSD1]);
-        }
-    }
-
-    /* Check and update the hardware options for internal consistency */
-    check_and_update_hw_opt_1(hw_opt, cr);
-
-    /* Early check for externally set process affinity. */
-    gmx_check_thread_affinity_set(fplog, cr,
-                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);
-
-#ifdef GMX_THREAD_MPI
-    if (SIMMASTER(cr))
-    {
-        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
-        {
-            gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME ranks");
-        }
-
-        /* Since the master knows the cut-off scheme, update hw_opt for this.
-         * This is done later for normal MPI and also once more with tMPI
-         * for all tMPI ranks.
-         */
-        check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
-
-        /* NOW the threads will be started: */
-        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
-                                                 hw_opt,
-                                                 inputrec, mtop,
-                                                 cr, fplog, bUseGPU);
-
-        if (hw_opt->nthreads_tmpi > 1)
-        {
-            t_commrec *cr_old       = cr;
-            /* now start the threads. */
-            cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
-                                        oenv, bVerbose, bCompact, nstglobalcomm,
-                                        ddxyz, dd_node_order, rdd, rconstr,
-                                        dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
-                                        nbpu_opt, nstlist_cmdline,
-                                        nsteps_cmdline, nstepout, resetstep, nmultisim,
-                                        repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
-                                        cpt_period, max_hours,
-                                        Flags);
-            /* the main thread continues here with a new cr. We don't deallocate
-               the old cr because other threads may still be reading it. */
-            if (cr == NULL)
-            {
-                gmx_comm("Failed to spawn threads");
-            }
-        }
-    }
-#endif
-    /* END OF CAUTION: cr is now reliable */
-
-    /* g_membed initialisation *
-     * Because we change the mtop, init_membed is called before the init_parallel *
-     * (in case we ever want to make it run in parallel) */
-    if (opt2bSet("-membed", nfile, fnm))
-    {
-        if (MASTER(cr))
-        {
-            fprintf(stderr, "Initializing membed");
-        }
-        membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);
-    }
-
-    if (PAR(cr))
-    {
-        /* now broadcast everything to the non-master nodes/threads: */
-        init_parallel(cr, inputrec, mtop);
-
-        /* The master rank decided on the use of GPUs,
-         * broadcast this information to all ranks.
-         */
-        gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr);
-    }
-
-    if (fplog != NULL)
-    {
-        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
-        fprintf(fplog, "\n");
-    }
-
-    /* now make sure the state is initialized and propagated */
-    set_state_entries(state, inputrec);
-
-    /* A parallel command line option consistency check that we can
-       only do after any threads have started. */
-    if (!PAR(cr) &&
-        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
-    {
-        gmx_fatal(FARGS,
-                  "The -dd or -npme option request a parallel simulation, "
-#ifndef GMX_MPI
-                  "but %s was compiled without threads or MPI enabled"
-#else
-#ifdef GMX_THREAD_MPI
-                  "but the number of MPI-threads (option -ntmpi) is not set or is 1"
-#else
-                  "but %s was not started through mpirun/mpiexec or only one rank was requested through mpirun/mpiexec"
-#endif
-#endif
-                  , output_env_get_program_display_name(oenv)
-                  );
-    }
-
-    if (bRerunMD &&
-        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
-    {
-        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
-    }
-
-    if (can_use_allvsall(inputrec, TRUE, cr, fplog) && DOMAINDECOMP(cr))
-    {
-        gmx_fatal(FARGS, "All-vs-all loops do not work with domain decomposition, use a single MPI rank");
-    }
-
-    if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)))
-    {
-        if (cr->npmenodes > 0)
-        {
-            gmx_fatal_collective(FARGS, cr, NULL,
-                                 "PME-only ranks are requested, but the system does not use PME for electrostatics or LJ");
-        }
-
-        cr->npmenodes = 0;
-    }
-
-    if (bUseGPU && cr->npmenodes < 0)
-    {
-        /* With GPUs we don't automatically use PME-only ranks. PME ranks can
-         * improve performance with many threads per GPU, since our OpenMP
-         * scaling is bad, but it's difficult to automate the setup.
-         */
-        cr->npmenodes = 0;
-    }
-
-#ifdef GMX_FAHCORE
-    if (MASTER(cr))
-    {
-        fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
-    }
-#endif
-
-    /* NMR restraints must be initialized before load_checkpoint,
-     * since with time averaging the history is added to t_state.
-     * For proper consistency check we therefore need to extend
-     * t_state here.
-     * So the PME-only nodes (if present) will also initialize
-     * the distance restraints.
-     */
-    snew(fcd, 1);
-
-    /* This needs to be called before read_checkpoint to extend the state */
-    init_disres(fplog, mtop, inputrec, cr, fcd, state, repl_ex_nst > 0);
-
-    init_orires(fplog, mtop, state->x, inputrec, cr, &(fcd->orires),
-                state);
-
-    if (DEFORM(*inputrec))
-    {
-        /* Store the deform reference box before reading the checkpoint */
-        if (SIMMASTER(cr))
-        {
-            copy_mat(state->box, box);
-        }
-        if (PAR(cr))
-        {
-            gmx_bcast(sizeof(box), box, cr);
-        }
-        /* Because we do not have the update struct available yet
-         * in which the reference values should be stored,
-         * we store them temporarily in static variables.
-         * This should be thread safe, since they are only written once
-         * and with identical values.
-         */
-        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
-        deform_init_init_step_tpx = inputrec->init_step;
-        copy_mat(box, deform_init_box_tpx);
-        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
-    }
-
-    if (opt2bSet("-cpi", nfile, fnm))
-    {
-        /* Check if checkpoint file exists before doing continuation.
-         * This way we can use identical input options for the first and subsequent runs...
-         */
-        if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )
-        {
-            load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
-                            cr, ddxyz,
-                            inputrec, state, &bReadEkin,
-                            (Flags & MD_APPENDFILES),
-                            (Flags & MD_APPENDFILESSET));
-
-            if (bReadEkin)
-            {
-                Flags |= MD_READ_EKIN;
-            }
-        }
-    }
-
-    if (MASTER(cr) && (Flags & MD_APPENDFILES))
-    {
-        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr,
-                     Flags, &fplog);
-    }
-
-    /* override nsteps with value from cmdline */
-    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
-
-    if (SIMMASTER(cr))
-    {
-        copy_mat(state->box, box);
-    }
-
-    if (PAR(cr))
-    {
-        gmx_bcast(sizeof(box), box, cr);
-    }
-
-    /* Essential dynamics */
-    if (opt2bSet("-ei", nfile, fnm))
-    {
-        /* Open input and output files, allocate space for ED data structure */
-        ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr);
-    }
-
-    if (PAR(cr) && !(EI_TPI(inputrec->eI) ||
-                     inputrec->eI == eiNM))
-    {
-        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,
-                                           dddlb_opt, dlb_scale,
-                                           ddcsx, ddcsy, ddcsz,
-                                           mtop, inputrec,
-                                           box, state->x,
-                                           &ddbox, &npme_major, &npme_minor);
-
-        make_dd_communicators(fplog, cr, dd_node_order);
-
-        /* Set overallocation to avoid frequent reallocation of arrays */
-        set_over_alloc_dd(TRUE);
-    }
-    else
-    {
-        /* PME, if used, is done on all nodes with 1D decomposition */
-        cr->npmenodes = 0;
-        cr->duty      = (DUTY_PP | DUTY_PME);
-        npme_major    = 1;
-        npme_minor    = 1;
-
-        if (inputrec->ePBC == epbcSCREW)
-        {
-            gmx_fatal(FARGS,
-                      "pbc=%s is only implemented with domain decomposition",
-                      epbc_names[inputrec->ePBC]);
-        }
-    }
-
-    if (PAR(cr))
-    {
-        /* After possible communicator splitting in make_dd_communicators.
-         * we can set up the intra/inter node communication.
-         */
-        gmx_setup_nodecomm(fplog, cr);
-    }
-
-    /* Initialize per-physical-node MPI process/thread ID and counters. */
-    gmx_init_intranode_counters(cr);
-#ifdef GMX_MPI
-    if (MULTISIM(cr))
-    {
-        md_print_info(cr, fplog,
-                      "This is simulation %d out of %d running as a composite GROMACS\n"
-                      "multi-simulation job. Setup for this simulation:\n\n",
-                      cr->ms->sim, cr->ms->nsim);
-    }
-    md_print_info(cr, fplog, "Using %d MPI %s\n",
-                  cr->nnodes,
-#ifdef GMX_THREAD_MPI
-                  cr->nnodes == 1 ? "thread" : "threads"
-#else
-                  cr->nnodes == 1 ? "process" : "processes"
-#endif
-                  );
-    fflush(stderr);
-#endif
-
-    /* Check and update hw_opt for the cut-off scheme */
-    check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
-
-    /* Check and update hw_opt for the number of MPI ranks */
-    check_and_update_hw_opt_3(hw_opt);
-
-    gmx_omp_nthreads_init(fplog, cr,
-                          hwinfo->nthreads_hw_avail,
-                          hw_opt->nthreads_omp,
-                          hw_opt->nthreads_omp_pme,
-                          (cr->duty & DUTY_PP) == 0,
-                          inputrec->cutoff_scheme == ecutsVERLET);
-
-#ifndef NDEBUG
-    if (integrator[inputrec->eI].func != do_tpi &&
-        inputrec->cutoff_scheme == ecutsVERLET)
-    {
-        gmx_feenableexcept();
-    }
-#endif
-
-    if (bUseGPU)
-    {
-        /* Select GPU id's to use */
-        gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,
-                           &hw_opt->gpu_opt);
-    }
-    else
-    {
-        /* Ignore (potentially) manually selected GPUs */
-        hw_opt->gpu_opt.n_dev_use = 0;
-    }
-
-    /* check consistency across ranks of things like SIMD
-     * support and number of GPUs selected */
-    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);
-
-    /* Now that we know the setup is consistent, check for efficiency */
-    check_resource_division_efficiency(hwinfo, hw_opt, Flags & MD_NTOMPSET,
-                                       cr, fplog);
-
-    if (DOMAINDECOMP(cr))
-    {
-        /* When we share GPUs over ranks, we need to know this for the DLB */
-        dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);
-    }
-
-    /* getting number of PP/PME threads
-       PME: env variable should be read only on one node to make sure it is
-       identical everywhere;
-     */
-    /* TODO nthreads_pp is only used for pinning threads.
-     * This is a temporary solution until we have a hw topology library.
-     */
-    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
-    nthreads_pme = gmx_omp_nthreads_get(emntPME);
-
-    wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);
-
-    if (PAR(cr))
-    {
-        /* Master synchronizes its value of reset_counters with all nodes
-         * including PME only nodes */
-        reset_counters = wcycle_get_reset_counters(wcycle);
-        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
-        wcycle_set_reset_counters(wcycle, reset_counters);
-    }
-
-    snew(nrnb, 1);
-    if (cr->duty & DUTY_PP)
-    {
-        bcast_state(cr, state);
-
-        /* Initiate forcerecord */
-        fr          = mk_forcerec();
-        fr->hwinfo  = hwinfo;
-        fr->gpu_opt = &hw_opt->gpu_opt;
-        init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box,
-                      opt2fn("-table", nfile, fnm),
-                      opt2fn("-tabletf", nfile, fnm),
-                      opt2fn("-tablep", nfile, fnm),
-                      getFilenm("-tableb", nfile, fnm),
-                      nbpu_opt,
-                      FALSE,
-                      pforce);
-
-        /* version for PCA_NOT_READ_NODE (see md.c) */
-        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
-           "nofile","nofile","nofile","nofile",FALSE,pforce);
-         */
-
-        /* Initialize QM-MM */
-        if (fr->bQMMM)
-        {
-            init_QMMMrec(cr, mtop, inputrec, fr);
-        }
-
-        /* Initialize the mdatoms structure.
-         * mdatoms is not filled with atom data,
-         * as this can not be done now with domain decomposition.
-         */
-        mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO);
-
-        /* Initialize the virtual site communication */
-        vsite = init_vsite(mtop, cr, FALSE);
-
-        calc_shifts(box, fr->shift_vec);
-
-        /* With periodic molecules the charge groups should be whole at start up
-         * and the virtual sites should not be far from their proper positions.
-         */
-        if (!inputrec->bContinuation && MASTER(cr) &&
-            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
-        {
-            /* Make molecules whole at start of run */
-            if (fr->ePBC != epbcNONE)
-            {
-                do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x);
-            }
-            if (vsite)
-            {
-                /* Correct initial vsite positions are required
-                 * for the initial distribution in the domain decomposition
-                 * and for the initial shell prediction.
-                 */
-                construct_vsites_mtop(vsite, mtop, state->x);
-            }
-        }
-
-        if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))
-        {
-            ewaldcoeff_q  = fr->ewaldcoeff_q;
-            ewaldcoeff_lj = fr->ewaldcoeff_lj;
-            pmedata       = &fr->pmedata;
-        }
-        else
-        {
-            pmedata = NULL;
-        }
-    }
-    else
-    {
-        /* This is a PME only node */
-
-        /* We don't need the state */
-        done_state(state);
-
-        ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
-        ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
-        snew(pmedata, 1);
-    }
-
-    if (hw_opt->thread_affinity != threadaffOFF)
-    {
-        /* Before setting affinity, check whether the affinity has changed
-         * - which indicates that probably the OpenMP library has changed it
-         * since we first checked).
-         */
-        gmx_check_thread_affinity_set(fplog, cr,
-                                      hw_opt, hwinfo->nthreads_hw_avail, TRUE);
-
-        /* Set the CPU affinity */
-        gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo);
-    }
-
-    /* Initiate PME if necessary,
-     * either on all nodes or on dedicated PME nodes only. */
-    if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
-    {
-        if (mdatoms)
-        {
-            nChargePerturbed = mdatoms->nChargePerturbed;
-            if (EVDW_PME(inputrec->vdwtype))
-            {
-                nTypePerturbed   = mdatoms->nTypePerturbed;
-            }
-        }
-        if (cr->npmenodes > 0)
-        {
-            /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
-            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
-            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
-        }
-
-        if (cr->duty & DUTY_PME)
-        {
-            status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec,
-                                  mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,
-                                  (Flags & MD_REPRODUCIBLE), nthreads_pme);
-            if (status != 0)
-            {
-                gmx_fatal(FARGS, "Error %d initializing PME", status);
-            }
-        }
-    }
-
-
-    if (integrator[inputrec->eI].func == do_md)
-    {
-        /* Turn on signal handling on all nodes */
-        /*
-         * (A user signal from the PME nodes (if any)
-         * is communicated to the PP nodes.
-         */
-        signal_handler_install();
-    }
-
-    if (cr->duty & DUTY_PP)
-    {
-        /* Assumes uniform use of the number of OpenMP threads */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
-
-        if (inputrec->bPull)
-        {
-            /* Initialize pull code */
-            inputrec->pull_work =
-                init_pull(fplog, inputrec->pull, inputrec, nfile, fnm,
-                          mtop, cr, oenv, inputrec->fepvals->init_lambda,
-                          EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags);
-        }
-
-        if (inputrec->bRot)
-        {
-            /* Initialize enforced rotation code */
-            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,
-                     bVerbose, Flags);
-        }
-
-        if (inputrec->eSwapCoords != eswapNO)
-        {
-            /* Initialize ion swapping code */
-            init_swapcoords(fplog, bVerbose, inputrec, opt2fn_master("-swap", nfile, fnm, cr),
-                            mtop, state->x, state->box, &state->swapstate, cr, oenv, Flags);
-        }
-
-        constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);
-
-        if (DOMAINDECOMP(cr))
-        {
-            GMX_RELEASE_ASSERT(fr, "fr was NULL while cr->duty was DUTY_PP");
-            dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
-                            Flags & MD_DDBONDCHECK, fr->cginfo_mb);
-
-            set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox);
-
-            setup_dd_grid(fplog, cr->dd);
-        }
-
-        /* Now do whatever the user wants us to do (how flexible...) */
-        integrator[inputrec->eI].func(fplog, cr, nfile, fnm,
-                                      oenv, bVerbose, bCompact,
-                                      nstglobalcomm,
-                                      vsite, constr,
-                                      nstepout, inputrec, mtop,
-                                      fcd, state,
-                                      mdatoms, nrnb, wcycle, ed, fr,
-                                      repl_ex_nst, repl_ex_nex, repl_ex_seed,
-                                      membed,
-                                      cpt_period, max_hours,
-                                      imdport,
-                                      Flags,
-                                      walltime_accounting);
-
-        if (inputrec->bPull)
-        {
-            finish_pull(inputrec->pull_work);
-        }
-
-        if (inputrec->bRot)
-        {
-            finish_rot(inputrec->rot);
-        }
-
-    }
-    else
-    {
-        GMX_RELEASE_ASSERT(pmedata, "pmedata was NULL while cr->duty was not DUTY_PP");
-        /* do PME only */
-        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
-        gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);
-    }
-
-    wallcycle_stop(wcycle, ewcRUN);
-
-    /* Finish up, write some stuff
-     * if rerunMD, don't write last frame again
-     */
-    finish_run(fplog, cr,
-               inputrec, nrnb, wcycle, walltime_accounting,
-               fr ? fr->nbv : NULL,
-               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
-
-
-    /* Free GPU memory and context */
-    free_gpu_resources(fr, cr, &hwinfo->gpu_info, fr ? fr->gpu_opt : NULL);
-
-    if (opt2bSet("-membed", nfile, fnm))
-    {
-        sfree(membed);
-    }
-
-    gmx_hardware_info_free(hwinfo);
-
-    /* Does what it says */
-    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", gmx_gettime());
-    walltime_accounting_destroy(walltime_accounting);
-
-    /* Close logfile already here if we were appending to it */
-    if (MASTER(cr) && (Flags & MD_APPENDFILES))
-    {
-        gmx_log_close(fplog);
-    }
-
-    rc = (int)gmx_get_stop_condition();
-
-    done_ed(&ed);
-
-#ifdef GMX_THREAD_MPI
-    /* we need to join all threads. The sub-threads join when they
-       exit this function, but the master thread needs to be told to
-       wait for that. */
-    if (PAR(cr) && MASTER(cr))
-    {
-        tMPI_Finalize();
-    }
-#endif
-
-    return rc;
-}