From 1d0a9981445eb60258d55f38e6600cae89406cbf Mon Sep 17 00:00:00 2001 From: Gareth Tribello <gt@eider.phy.qub.ac.uk> Date: Thu, 2 Jul 2015 18:24:33 +0100 Subject: [PATCH] Added documentation for analysis tools --- src/analysis/AnalysisBase.cpp | 2 +- src/analysis/AnalysisWithDataCollection.cpp | 10 +- src/analysis/EuclideanDissimilarityMatrix.cpp | 9 + src/analysis/FarthestPointSampling.cpp | 10 + src/analysis/LandmarkSelectionBase.cpp | 7 + src/analysis/LandmarkSelectionBase.h | 1 + src/analysis/OutputColvarFile.cpp | 9 +- src/analysis/OutputPDBFile.cpp | 7 + src/analysis/PrintDissimilarityMatrix.cpp | 10 + src/analysis/ReadDissimilarityMatrix.cpp | 9 + src/analysis/SelectWithStride.cpp | 9 + .../ClassicalMultiDimensionalScaling.cpp | 2 +- src/dimred/ProjectNonLandmarkPoints.cpp | 9 + src/dimred/SketchMapPointwise.cpp | 8 + src/dimred/SketchMapSmacof.cpp | 9 + src/dimred/SmacoffMDS.cpp | 5 +- src/multicolvar/DumpMultiColvar.cpp | 2 +- src/multicolvar/MultiColvarDensity.cpp | 2 +- src/tools/Keywords.cpp | 11 +- src/tools/Keywords.h | 6 +- user-doc/Analysis.txt | 213 +++++++++++++++++- user-doc/Colvar.txt | 8 + user-doc/extract | 7 +- 23 files changed, 342 insertions(+), 23 deletions(-) diff --git a/src/analysis/AnalysisBase.cpp b/src/analysis/AnalysisBase.cpp index 26fb94e17..0b088daf4 100644 --- a/src/analysis/AnalysisBase.cpp +++ b/src/analysis/AnalysisBase.cpp @@ -31,7 +31,7 @@ void AnalysisBase::registerKeywords( Keywords& keys ){ ActionPilot::registerKeywords( keys ); ActionAtomistic::registerKeywords( keys ); ActionWithArguments::registerKeywords( keys ); - ActionWithVessel::registerKeywords( keys ); keys.remove("TOL"); keys.remove("LOWMEM"); + ActionWithVessel::registerKeywords( keys ); keys.remove("TOL"); keys.remove("LOWMEM"); keys.remove("TIMINGS"); keys.isAnalysis(); keys.add("atoms-2","USE_OUTPUT_DATA_FROM","use the ouput of the analysis performed by this object as input to your new analysis object"); } diff --git a/src/analysis/AnalysisWithDataCollection.cpp b/src/analysis/AnalysisWithDataCollection.cpp index 100e6d302..61ddf9edd 100644 --- a/src/analysis/AnalysisWithDataCollection.cpp +++ b/src/analysis/AnalysisWithDataCollection.cpp @@ -40,12 +40,14 @@ void AnalysisWithDataCollection::registerKeywords( Keywords& keys ){ keys.add("atoms-1","STRIDE","the frequency with which data should be stored for analysis. By default data is collected on every step"); keys.add("atoms-1","RUN","the frequency with which to run the analysis algorithms."); keys.addFlag("USE_ALL_DATA",false,"just analyse all the data in the trajectory. This option should be used in tandem with ATOMS/ARG + STRIDE"); - keys.addFlag("REWEIGHT_BIAS",false,"reweight the data using all the biases acting on the dynamics. This option must be used in tandem with ATOMS/ARG + STRIDE. For more information see \\ref reweighting"); - keys.add("optional","REWEIGHT_TEMP","reweight data from a trajectory at one temperature and output the probability distribution at a second temperature. This option must be used in tandem with ATOMS/ARG + STRIDE. For more information see \\ref reweighting"); + keys.addFlag("REWEIGHT_BIAS",false,"reweight the data using all the biases acting on the dynamics. This option must be used in tandem with ATOMS/ARG + STRIDE + RUN/USE_ALL_DATA. " + "For more information see \\ref analysisbas"); + keys.add("optional","REWEIGHT_TEMP","reweight data from a trajectory at one temperature and output the probability distribution at a second temperature. This option must be used in tandem with ATOMS/ARG + STRIDE + RUN/USE_ALL_DATA. " + "For more information see \\ref analysisbas"); keys.add("optional","TEMP","the system temperature. This is required if you are reweighting (REWEIGHT_BIAS/REWEIGHT_TEMP) or if you are calculating free energies. You are not required to specify the temperature if this is passed by the underlying MD code."); keys.add("atoms-3","REUSE_INPUT_DATA_FROM","do a second form of analysis on the data stored by a previous analysis object"); - keys.addFlag("WRITE_CHECKPOINT",false,"write out a checkpoint so that the analysis can be restarted in a later run. This option must be used in tandem with ATOMS/ARG + STRIDE."); - keys.addFlag("NOMEMORY",false,"do a block averaging i.e. analyse each block of data separately. This option must be used in tandem with ATOMS/ARG + STRIDE."); + keys.addFlag("WRITE_CHECKPOINT",false,"write out a checkpoint so that the analysis can be restarted in a later run. This option must be used in tandem with ATOMS/ARG + STRIDE + RUN."); + keys.addFlag("NOMEMORY",false,"do a block averaging i.e. analyse each block of data separately. This option must be used in tandem with ATOMS/ARG + STRIDE + RUN."); keys.use("RESTART"); keys.use("UPDATE_FROM"); keys.use("UPDATE_UNTIL"); } diff --git a/src/analysis/EuclideanDissimilarityMatrix.cpp b/src/analysis/EuclideanDissimilarityMatrix.cpp index bbe1545d7..246c0a8b4 100644 --- a/src/analysis/EuclideanDissimilarityMatrix.cpp +++ b/src/analysis/EuclideanDissimilarityMatrix.cpp @@ -23,6 +23,15 @@ #include "core/ActionRegister.h" #include "reference/ReferenceConfiguration.h" +//+PLUMEDOC ANALYSIS EUCLIDEAN_DISSIMILARITIES +/* +Calculate the matrix of dissimilarities between a trajectory of atomic configurations. + +\par Examples + +*/ +//+ENDPLUMEDOC + namespace PLMD { namespace analysis { diff --git a/src/analysis/FarthestPointSampling.cpp b/src/analysis/FarthestPointSampling.cpp index 7b2465af7..6461cb90c 100644 --- a/src/analysis/FarthestPointSampling.cpp +++ b/src/analysis/FarthestPointSampling.cpp @@ -23,6 +23,15 @@ #include "core/ActionRegister.h" #include "tools/Random.h" +//+PLUMEDOC LANDMARKS LANDMARK_SELECT_FPS +/* +Select a set of landmarks using farthest point sampling. + +\par Examples + +*/ +//+ENDPLUMEDOC + namespace PLMD { namespace analysis { @@ -39,6 +48,7 @@ PLUMED_REGISTER_ACTION(FarthestPointSampling,"LANDMARK_SELECT_FPS") void FarthestPointSampling::registerKeywords( Keywords& keys ){ LandmarkSelectionBase::registerKeywords(keys); + LandmarkSelectionBase::removeDataCollectionKeywords( keys ); keys.add("compulsory","SEED","1234","a random number seed"); } diff --git a/src/analysis/LandmarkSelectionBase.cpp b/src/analysis/LandmarkSelectionBase.cpp index d694abf17..542f1fa01 100644 --- a/src/analysis/LandmarkSelectionBase.cpp +++ b/src/analysis/LandmarkSelectionBase.cpp @@ -24,6 +24,13 @@ namespace PLMD { namespace analysis { +void LandmarkSelectionBase::removeDataCollectionKeywords( Keywords& keys ){ + keys.remove("ATOMS"); keys.remove("STRIDE"); keys.remove("RUN"); keys.remove("USE_ALL_DATA"); + keys.remove("REWEIGHT_BIAS"); keys.remove("REWEIGHT_TEMP"); keys.remove("TEMP"); + keys.remove("REUSE_INPUT_DATA_FROM"); keys.remove("WRITE_CHECKPOINT"); keys.remove("NOMEMORY"); + keys.remove("RESTART"); keys.remove("UPDATE_FROM"); keys.remove("UPDATE_UNTIL"); keys.remove("ARG"); +} + void LandmarkSelectionBase::registerKeywords( Keywords& keys ){ AnalysisWithDataCollection::registerKeywords( keys ); keys.add("compulsory","NLANDMARKS","the number of landmarks that you would like to select"); diff --git a/src/analysis/LandmarkSelectionBase.h b/src/analysis/LandmarkSelectionBase.h index 2a54eb53b..2b9cd41af 100644 --- a/src/analysis/LandmarkSelectionBase.h +++ b/src/analysis/LandmarkSelectionBase.h @@ -42,6 +42,7 @@ protected: void selectFrame( const unsigned& ); public: static void registerKeywords( Keywords& keys ); + static void removeDataCollectionKeywords( Keywords& keys ); LandmarkSelectionBase( const ActionOptions& ao ); /// Return the number of data points unsigned getNumberOfDataPoints() const ; diff --git a/src/analysis/OutputColvarFile.cpp b/src/analysis/OutputColvarFile.cpp index 754426519..df589e265 100644 --- a/src/analysis/OutputColvarFile.cpp +++ b/src/analysis/OutputColvarFile.cpp @@ -61,7 +61,14 @@ void OutputColvarFile::registerKeywords( Keywords& keys ){ AnalysisWithDataCollection::registerKeywords( keys ); keys.add("compulsory","FILE","the name of the file to output to"); keys.add("optional","FMT","the format to output the data using"); -} + keys.reset_style("ATOMS","hidden"); keys.reset_style("STRIDE","hidden"); + keys.reset_style("RUN","hidden"); keys.reset_style("USE_ALL_DATA","hidden"); + keys.reset_style("REWEIGHT_BIAS","hidden"); keys.reset_style("REWEIGHT_TEMP","hidden"); + keys.reset_style("TEMP","hidden"); keys.reset_style("REUSE_INPUT_DATA_FROM","hidden"); + keys.reset_style("WRITE_CHECKPOINT","hidden"); keys.reset_style("NOMEMORY","hidden"); + keys.reset_style("RESTART","hidden"); keys.reset_style("UPDATE_FROM","hidden"); + keys.reset_style("UPDATE_UNTIL","hidden"); keys.reset_style("ARG","hidden"); +} OutputColvarFile::OutputColvarFile( const ActionOptions& ao ): Action(ao), diff --git a/src/analysis/OutputPDBFile.cpp b/src/analysis/OutputPDBFile.cpp index dbcf8d2ed..6112115e1 100644 --- a/src/analysis/OutputPDBFile.cpp +++ b/src/analysis/OutputPDBFile.cpp @@ -55,6 +55,13 @@ void OutputPDBFile::registerKeywords( Keywords& keys ){ AnalysisWithDataCollection::registerKeywords( keys ); keys.add("compulsory","FILE","the name of the file to output to"); keys.add("optional","FMT","the format to use in the output file"); + keys.reset_style("ATOMS","hidden"); keys.reset_style("STRIDE","hidden"); + keys.reset_style("RUN","hidden"); keys.reset_style("USE_ALL_DATA","hidden"); + keys.reset_style("REWEIGHT_BIAS","hidden"); keys.reset_style("REWEIGHT_TEMP","hidden"); + keys.reset_style("TEMP","hidden"); keys.reset_style("REUSE_INPUT_DATA_FROM","hidden"); + keys.reset_style("WRITE_CHECKPOINT","hidden"); keys.reset_style("NOMEMORY","hidden"); + keys.reset_style("RESTART","hidden"); keys.reset_style("UPDATE_FROM","hidden"); + keys.reset_style("UPDATE_UNTIL","hidden"); keys.reset_style("ARG","hidden"); } OutputPDBFile::OutputPDBFile( const ActionOptions& ao ): diff --git a/src/analysis/PrintDissimilarityMatrix.cpp b/src/analysis/PrintDissimilarityMatrix.cpp index 1b76c3bb6..f90f0a339 100644 --- a/src/analysis/PrintDissimilarityMatrix.cpp +++ b/src/analysis/PrintDissimilarityMatrix.cpp @@ -23,6 +23,16 @@ #include "AnalysisBase.h" #include "core/ActionRegister.h" +//+PLUMEDOC ANALYSIS PRINT_DISSIMILARITY_MATRIX +/* +Print the matrix of dissimilarities between a trajectory of atomic configurations. + +\par Examples + +*/ +//+ENDPLUMEDOC + + namespace PLMD { namespace analysis { diff --git a/src/analysis/ReadDissimilarityMatrix.cpp b/src/analysis/ReadDissimilarityMatrix.cpp index dfc4dcb6c..03df61abb 100644 --- a/src/analysis/ReadDissimilarityMatrix.cpp +++ b/src/analysis/ReadDissimilarityMatrix.cpp @@ -25,6 +25,15 @@ #include "core/ActionRegister.h" #include "tools/IFile.h" +//+PLUMEDOC ANALYSIS READ_DISSIMILARITY_MATRIX +/* +Read a matrix of dissimilarities between a trajectory of atomic configurations from a file. + +\par Examples + +*/ +//+ENDPLUMEDOC + namespace PLMD { namespace analysis { diff --git a/src/analysis/SelectWithStride.cpp b/src/analysis/SelectWithStride.cpp index 12a2ef970..4062dfdd0 100644 --- a/src/analysis/SelectWithStride.cpp +++ b/src/analysis/SelectWithStride.cpp @@ -22,6 +22,15 @@ #include "LandmarkSelectionBase.h" #include "core/ActionRegister.h" +//+PLUMEDOC LANDMARKS LANDMARK_SELECT_STRIDE +/* +Select every \f$k\f$th landmark from the trajectory. + +\par Examples + +*/ +//+ENDPLUMEDOC + namespace PLMD { namespace analysis { diff --git a/src/dimred/ClassicalMultiDimensionalScaling.cpp b/src/dimred/ClassicalMultiDimensionalScaling.cpp index 9416579d8..e971e6891 100644 --- a/src/dimred/ClassicalMultiDimensionalScaling.cpp +++ b/src/dimred/ClassicalMultiDimensionalScaling.cpp @@ -22,7 +22,7 @@ #include "DimensionalityReductionBase.h" #include "core/ActionRegister.h" -//+PLUMEDOC ANALYSIS CLASSICAL_MDS +//+PLUMEDOC DIMRED CLASSICAL_MDS /* Create a low-dimensional projection of a trajectory using the classical multidimensional scaling algorithm. diff --git a/src/dimred/ProjectNonLandmarkPoints.cpp b/src/dimred/ProjectNonLandmarkPoints.cpp index 5b4a2cf96..a606ffb31 100644 --- a/src/dimred/ProjectNonLandmarkPoints.cpp +++ b/src/dimred/ProjectNonLandmarkPoints.cpp @@ -28,6 +28,15 @@ #include "analysis/AnalysisBase.h" #include "DimensionalityReductionBase.h" +//+PLUMEDOC DIMRED PROJECT_ALL_ANALYSIS_DATA +/* +Find projections of all non-landmark points using the embedding calculated by a dimensionality reduction optimisation calculation. + +\par Examples + +*/ +//+ENDPLUMEDOC + namespace PLMD { namespace dimred { diff --git a/src/dimred/SketchMapPointwise.cpp b/src/dimred/SketchMapPointwise.cpp index b9f18fca5..66253d942 100644 --- a/src/dimred/SketchMapPointwise.cpp +++ b/src/dimred/SketchMapPointwise.cpp @@ -24,6 +24,14 @@ #include "tools/ConjugateGradient.h" #include "tools/GridSearch.h" +//+PLUMEDOC DIMRED SKETCHMAP_POINTWISE +/* +Optimise the sketch-map stress function using a pointwise global optimisation algorithm. + +\par Examples + +*/ +//+ENDPLUMEDOC namespace PLMD { namespace dimred { diff --git a/src/dimred/SketchMapSmacof.cpp b/src/dimred/SketchMapSmacof.cpp index 591df4e5c..cf0a7bf29 100644 --- a/src/dimred/SketchMapSmacof.cpp +++ b/src/dimred/SketchMapSmacof.cpp @@ -23,6 +23,15 @@ #include "SketchMapBase.h" #include "SMACOF.h" +//+PLUMEDOC DIMRED SKETCHMAP_SMACOF +/* +Optimise the sketch-map stress function using the SMACOF algorithm. + +\par Examples + +*/ +//+ENDPLUMEDOC + namespace PLMD { namespace dimred { diff --git a/src/dimred/SmacoffMDS.cpp b/src/dimred/SmacoffMDS.cpp index 3c41163eb..f3685da23 100644 --- a/src/dimred/SmacoffMDS.cpp +++ b/src/dimred/SmacoffMDS.cpp @@ -23,10 +23,9 @@ #include "core/ActionRegister.h" #include "SMACOF.h" -//+PLUMEDOC ANALYSIS SMACOFF_MDS +//+PLUMEDOC DIMRED SMACOF_MDS /* -Create a low-dimensional projection of a trajectory using the classical multidimensional -scaling algorithm. +Optimise the multidimensional scaling stress function using the SMACOF algorithm. \par Examples diff --git a/src/multicolvar/DumpMultiColvar.cpp b/src/multicolvar/DumpMultiColvar.cpp index 3d1b8bba5..6215c6e6c 100644 --- a/src/multicolvar/DumpMultiColvar.cpp +++ b/src/multicolvar/DumpMultiColvar.cpp @@ -40,7 +40,7 @@ namespace PLMD { namespace multicolvar { -//+PLUMEDOC ANALYSIS DUMPMULTICOLVAR +//+PLUMEDOC MCOLVARA DUMPMULTICOLVAR /* Dump atom positions and multicolvar on a file. diff --git a/src/multicolvar/MultiColvarDensity.cpp b/src/multicolvar/MultiColvarDensity.cpp index 8222be59f..639172ee6 100644 --- a/src/multicolvar/MultiColvarDensity.cpp +++ b/src/multicolvar/MultiColvarDensity.cpp @@ -42,7 +42,7 @@ namespace PLMD { namespace multicolvar { -//+PLUMEDOC ANALYSIS MULTICOLVARDENS +//+PLUMEDOC MCOLVARA MULTICOLVARDENS /* Dump atom positions and multicolvar on a file. diff --git a/src/tools/Keywords.cpp b/src/tools/Keywords.cpp index 9f276efd1..255a8cb0e 100644 --- a/src/tools/Keywords.cpp +++ b/src/tools/Keywords.cpp @@ -362,18 +362,21 @@ void Keywords::print_html() const { if ( (types.find(keys[i])->second).isAtomList() ) nkeys++; } if( nkeys>0 ){ - if(isaction) std::cout<<"\\par The atoms involved can be specified using\n\n"; + if(isaction && isatoms) std::cout<<"\\par The atoms involved can be specified using\n\n"; + else if(isaction) std::cout<<"\\par The data to analyse can be the output from another analysis algorithm\n\n"; else std::cout<<"\\par The input trajectory is specified using one of the following\n\n"; std::cout<<" <table align=center frame=void width=95%% cellpadding=5%%> \n"; - std::string prevtag="start"; + std::string prevtag="start"; unsigned counter=0; for(unsigned i=0;i<keys.size();++i){ if ( (types.find(keys[i])->second).isAtomList() ){ plumed_massert( atomtags.count(keys[i]), "keyword " + keys[i] + " allegedly specifies atoms but no tag has been specified. Please email Gareth Tribello"); if( prevtag!="start" && prevtag!=atomtags.find(keys[i])->second && isaction ){ std::cout<<"</table>\n\n"; - std::cout<<"\\par Or alternatively by using\n\n"; + if( isatoms ) std::cout<<"\\par Or alternatively by using\n\n"; + else if( counter==0 ){ std::cout<<"\\par Alternatively data can be collected from the trajectory using \n\n"; counter++; } + else std::cout<<"\\par Lastly data collected in a previous analysis action can be reanalysed by using the keyword \n\n"; std::cout<<" <table align=center frame=void width=95%% cellpadding=5%%> \n"; - } + } print_html_item( keys[i] ); prevtag=atomtags.find(keys[i])->second; } diff --git a/src/tools/Keywords.h b/src/tools/Keywords.h index 109005406..f52956511 100644 --- a/src/tools/Keywords.h +++ b/src/tools/Keywords.h @@ -58,6 +58,8 @@ friend class Action; private: /// Is this an action or driver (this bool affects what style==atoms does in print) bool isaction; +/// This allows us to overwrite the behavior of the atoms type in analysis actions + bool isatoms; /// The names of the allowed keywords std::vector<std::string> keys; /// The names of the reserved keywords @@ -90,9 +92,11 @@ private: void printKeyword( const std::string& j, FILE* out ) const ; public: /// Constructor - Keywords() : isaction(true) {} + Keywords() : isaction(true), isatoms(true) {} /// void isDriver(){ isaction=false; } +/// + void isAnalysis(){ isatoms=false; } /// find out whether flag key is on or off by default. bool getLogicalDefault( std::string key, bool& def ) const ; /// Get the value of the default for the keyword named key diff --git a/user-doc/Analysis.txt b/user-doc/Analysis.txt index 5e03d3a2e..b00dadfae 100644 --- a/user-doc/Analysis.txt +++ b/user-doc/Analysis.txt @@ -2,11 +2,216 @@ \page Analysis Analysis -PLUMED can be used to analyse trajectories either on the fly during an MD run or via -postprocessing a trajectory using \ref driver. The following is a list of the various -methods for analysing trajectories contained in PLUMED. +\section analysisbas Introduction + +A molecular dynamics trajectory is in essence an ordered set of configurations of atoms. Trajectory +analysis algorithms are methods that allow us to extract meaningful information from this extremely +high-dimensionality information. In extracting this information much of the information in the trajectory +will be discarded and assumed to be irrelevant to the problem at hand. For example, when we calculate a +histogram from a trajectory we throw away all information on the order the frames were visited during the +trajectory. We instead opt to display a time average that shows the parts of configuration space that were +visited most frequently. There are many situations in which this is a reasonable thing to do as we know that +time averages are equivalent to ensemble averages in the long timescale limit and that these average probabilities +of being in different parts of configuration space, \f$P(s)\f$, are thus related to the underlying free +energy, \f$F(s)\f$, via: +\f[ +F(s) = - k_B T \ln P(s) +\f] +In fact we can even exploit our understanding of statistical thermodynamics and basic algebra and derive expressions +that connect the time average of the probability of being in a particular configuration in a simulaton at one temperature, +\f$T_1\f$, with the free energy at a second different temperature, \f$T_2\f$: + +\f[ +P(s',t) = \frac{ \sum_{t'=0}^t \delta( s(x) - s' ) \exp\left( +( \left[\frac{1}{T_1} - \frac{1}{T_2}\right] \frac{U(x,t')}{k_B} \right) }{ \sum_{t'=0}^t \exp\left( +\left[\frac{1}{T_1} - \frac{1}{T_2}\right] \frac{U(x,t')}{k_B} \right) } +\f] + +Similarly, we can exploit statistical thermodynamics to calculate the free energy of a configuration from the probability +of being in that configuration that was calculated in an biased simulation. + +\f[ +P(s',t) = \frac{ \sum_{t'=0}^t \delta( s(x) - s' ) \exp\left( +\frac{V(x,t')}{k_B T} \right) }{ \sum_{t'=0}^t \exp\left( +\frac{V(x,t')}{k_B T} \right) } +\f] + +In both of these expressions we have a number of frames from a trajectory of length \f$t'\f$. In the first of these expressions +\f$U(x,t')\f$ is the potential energy of the system (including any biases) at time \f$t'\f$. Meanwhile, in the second of +these expressions \f$V(x,t')\f$ is the value of the simulation bias at time \f$t'\f$. The dirac delta function, \f$\delta\f$, +on the first lines of these expressions indicates that we are going to calculate a \ref HISTOGRAM as a function of some +collective variable \f$s(x)\f$ that can be calulated from the atomic positions \f$x\f$. Herein lies the central problem: +what collective variable function should we use in this expression? What \f$s(x)\f$ will give us the most meaningful +interpretation of the trajectory data? Obviously, we can use any one of the \ref colvarintro implemented in tandem +with \ref HISTOGRAM. We can also, however, use a number of other algorithms that essentially ``learn" from our simulation +trajectory and that thus provide ways of alternative ways of displaying the probability information that can be calculated +using the various formula above. These techniques are described in the following sections. + +\section basanal Basic analysis + +PLUMED contains a suite of tools that can be used to analyse simulation trjaectories. +These tools can be employed on the fly during an MD run or through driver using the +\ref driver tool. The simplest of these tools are: + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> <td width=5%> \subpage PRINT </td> <td>Print quantities to a file.</td> </tr> +<tr> <td width=5%> \subpage DUMPATOMS </td> <td>Dump selected atoms on a file.</td> </tr> +</table> + +These allow you to print colvar values or the positions of atoms to a file. + +A more complex way of analysing the data in your trajectory is to calculate a histogram +as function of a small number of collective variables. Alternatively, by running a large +number of simulations you can calculate committor probabilities. The commands for doing these +sorts of analyses within PLUMED are as follows: + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> <td width=5%> \subpage HISTOGRAM </td> <td>Calculate the probability density as a function of a few CVs either using kernel density estimation, or a discretehistogram estimation. </td> </tr> +<tr> <td width=5%> \subpage COMMITTOR </td> <td>Does a committor analysis.</td> </tr> +</table> + +PLUMED then has a number of other tools for doing more sophisticated forms of analysis that are +described in the sections that follow. + +\section diag Diagnostic tools + +PLUMED has a number of diagnostic tools that can be used to check that new Actions are working correctly: + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> <td width=5%> \subpage DUMPFORCES </td> <td>Dump the force acting on one of a values in a file. </td> </tr> +<tr> <td width=5%> \subpage DUMPDERIVATIVES </td> <td>Dump the derivatives with respect to the input parameters for one or more objects (generally CVs, functions or biases).</td> </tr> +<tr> <td width=5%> \subpage DUMPMASSCHARGE </td> <td>Dump masses and charges on a selected file.</td> </tr> +<tr> <td width=5%> \subpage DUMPPROJECTIONS </td> <td>Dump the derivatives with respect to the input parameters for one or more objects (generally CVs, functions or biases).</td> </tr> +</table> + +These commands allow you to test that derivatives and forces are calculated correctly +within colvars and functions. One place where this is very useful is when you are testing whether or +not you have implemented the derivatives of a new collective variables correctly. So for example if +we wanted to do such a test on the distance CV we would employ an input file something like this: + +\verbatim +d1: DISTANCE ATOMS=1,2 +d1n: DISTANCE ATOMS=1,2 NUMERICAL_DERIVATIVES +DUMPDERIVATIVES ARG=d1,d1n FILE=derivatives +\endverbatim + +The first of these two distance commands calculates the analytical derivtives of the distance +while the second calculates these derivatives numerically. Obviously, if your CV is implemented +correctly these two sets of quantities should be nearly identical. + +\section dissimilaritym Calculating dissimilarity matrices + +One of the simplest things that we can do with a trajectory is that we can calculate the dissimilarity between +every pair of frames within it. When using the \ref dimred "dimensionality reduction" algorithms described in +the sections that follow the first step is to calculate this matrix. Consequently, within PLUMED the following +command will collect the trajectory data as your simulation progressed and calculate the dissimilarities: + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> <td width=5%> \subpage EUCLIDEAN_DISSIMILARITIES </td> <td> Calculate the matrix of dissimilarities between a trajectory of atomic configurations. </td> </tr> +</table> + +By exploiting the functionality described in \ref dists you can calculate these dissimilarities in +a wide variety of different ways (e.g. you can use \ref RMSD, or you can use a collection of collective variable +values see \ref TARGET). If you wish to view this dissimilarity information you can print these quantities +to a file using: + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> <td width=5%> \subpage PRINT_DISSIMILARITY_MATRIX </td> <td> Print the matrix of dissimilarities between a trajectory of atomic configurations. </td> </tr> +</table> + +In addition, if PLUMED does not calculate the dissimilarities you need you can read this information from an +external file + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> <td width=5%> \subpage READ_DISSIMILARITY_MATRIX </td> <td> Read a matrix of dissimilarities between a trajectory of atomic configurations from a file. </td> </tr> +</table> + +N.B. You can only use the \ref READ_DISSIMILARITY_MATRIX command when you are doing post-processing. + +\ref landmarks Landmark Selection + +Many of the techniques described in the following sections are very computationally expensive to run on large trajectories. +A common strategy is thus to use a landmark selection algorithm to pick a particularly-reprentative subset of trajectory +frames and to only apply the expensive analysis algorithm on these configurations. The various landmark selection algorithms +that are available in PLUMED are as follows + +@LANDMARKS@ + +Some of these algorithms (e.g. \ref LANDMARK_SELECT_STRIDE) can collect data from the trajectory themselves. Others such as +\ref LANDMARK_SELECT_FPS must take a dissimilarity matrix action as input. That is to say they must be used as follows: + +\verbatim +ss1: EUCLIDEAN_DISSIMILARITIES STRIDE=1 USE_ALL_DATA ARG=d1 +ll2: LANDMARK_SELECT_FPS USE_OUTPUT_DATA_FROM=ss1 NLANDMARKS=300 +\endverbatim + +When landmark selection is performed in this way a weight is ascribed to each of the landmark configurations. This weight is +calculated by summing the weights of all the trajectory frames in each of the landmarks Voronoi polyhedra +(https://en.wikipedia.org/wiki/Voronoi_diagram). The weight of each trajectory frame is one unless you are reweighting using the +formula described in the \ref analysisbas to counteract the fact of a simulation bias or an elevated temperature. If you are reweighting +using these formula the weight of each of the points is equal to the exponential term in the numerator of these expressions. + +\section dimred Dimensionality Reduction + +Many dimensionality reduction algorithms work in a manner similar to the way we use when we make maps. You start with distances +between London, Belfast, Paris and Dublin and then you try to arrange points on a piece of paper so that the (suitably transformed) +distances between the points in your map representing each of those cities are related to the true distances between the cities. +Stating this more mathematically MDS endeavors to find an <a href="http://en.wikipedia.org/wiki/Isometry">isometry</a> +between points distributed in a high-dimensional space and a set of points distributed in a low-dimensional plane. +In other words, if we have \f$M\f$ \f$D\f$-dimensional points, \f$\mathbf{X}\f$, +and we can calculate dissimilarities between pairs them, \f$D_{ij}\f$, we can, with an MDS calculation, try to create \f$M\f$ projections, +\f$\mathbf{x}\f$, of the high dimensionality points in a \f$d\f$-dimensional linear space by trying to arrange the projections so that the +Euclidean distances between pairs of them, \f$d_{ij}\f$, resemble the dissimilarities between the high dimensional points. In short we minimize: + +\f[ +\chi^2 = \sum_{i \ne j} w_i w_j \left( F(D_{ij}) - f(d_{ij}) \right)^2 +\f] + +where \f$F(D_{ij})\f$ is some transformation of the distance between point \f$X^{i}\f$ and point \f$X^{j}\f$ and \f$f(d_{ij})\f$ is some transformation +of the distance between the projection of \f$X^{i}\f$, \f$x^i\f$, and the projection of \f$X^{j}\f$, \f$x^j\f$. \f$w_i\f$ and \f$w_j\f$ are the weights +of configurations \f$X^i\f$ and \f$^j\f$ respectively. These weights are caclulated using the reweighting and voronoi polyhedra approaches described in +previous sections. A tutorial on dimensionality reduction and how it can be used to analyse simulations can be found in the tutorial \ref belfast-3 and in +the following <a href="https://www.youtube.com/watch?v=ofC2qz0_9_A&feature=youtu.be" > short video.</a> + +Within PLUMED running an input to run a dimensionality reduction algorithm can be as simple as: + +\verbatim +ss1: EUCLIDEAN_DISSIMILARITIES STRIDE=1 USE_ALL_DATA ARG=d1 +mds: CLASSICAL_MDS USE_OUTPUT_DATA_FROM=ss1 NLOW_DIM=2 +\endverbatim + +Where we have to use the \ref EUCLIDEAN_DISSIMILARITIES action here in order to calculate the matrix of dissimilarities between trajectory frames. +We can even throw some landmark selection into this procedure and perform + +\verbatim +ss1: EUCLIDEAN_DISSIMILARITIES STRIDE=1 USE_ALL_DATA ARG=d1 +ll2: LANDMARK_SELECT_FPS USE_OUTPUT_DATA_FROM=ss1 NLANDMARKS=300 +mds: CLASSICAL_MDS USE_OUTPUT_DATA_FROM=ll2 NLOW_DIM=2 +osample: PROJECT_ALL_ANALYSIS_DATA USE_OUTPUT_DATA_FROM=ss1 PROJECTION=smap +\endverbatim + +Notice here that the final command allows us to caluclate the projections of all the non-landmark points that were collected by the action with +label ss1. + +Dimensionality can be more complicated, however, because the stress function that calculates \f$\chi^2\f$ has to optimised rather carefully using +a number of different algorithms. The various algorithms that can be used to optimise this function are described below + +@DIMRED@ + +\ref output Outputting the results from analysis algorithms + +The following methods are available for printing the result output by the various analysis algorithms: + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> <td width=5%> \subpage OUTPUT_ANALYSIS_DATA_TO_COLVAR </td> <td> Output the results from an analysis using the PLUMED colvar file format. </td> </tr> +<tr> <td width=5%> \subpage OUTPUT_ANALYSIS_DATA_TO_PDB </td> <td> Output the results from an analysis using the PDB file format.</td> </tr> +</table> + +If you use the above commands to output data from one of the \ref landmarks algorithms then only the second will give you information on the +atomic positions in your landmark configurations and their associated weights. The first of these commands will give the values of the colvars +in the landmark configurations only. If you use the above commands to output data from one of the \ref dimred algorithms then +\ref OUTPUT_ANALYSIS_DATA_TO_COLVAR will give you an output file that contains the projection for each of your input points. \ref OUTPUT_ANALYSIS_DATA_TO_PDB +will give you a PDB that contains the position of the input point, the projections and the weight of the configuration. + +A nice feature of plumed is that when you use \ref landmarks algorithms or \ref dimred algorithms the output information is just a vector of +variables. As such you can use \ref HISTOGRAM to construct a histogram of the information generated by these algorithms. -@ANALYSIS@ */ diff --git a/user-doc/Colvar.txt b/user-doc/Colvar.txt index 137be9afa..03d640f59 100644 --- a/user-doc/Colvar.txt +++ b/user-doc/Colvar.txt @@ -138,5 +138,13 @@ The list of biases of this type are as follows: Notice that (in theory) you could also use this functionality to add additional terms to your forcefield or to implement your forcefield. +\section usingbase Extracting all the base quantities + +There may be occasions where you want to get information on all the individual colvar values that you have calculated. +For example you might want to output the values of all the coordination numbers calculated by a \ref COORDINATIONNUMERS +action. You can thus use the following commands to extract this sort of information. + +@MCOLVARA@ + */ diff --git a/user-doc/extract b/user-doc/extract index f1b81225f..da89727f3 100755 --- a/user-doc/extract +++ b/user-doc/extract @@ -92,7 +92,7 @@ awk 'BEGIN{gfile="automatic/GLOSSARY.list"; dfile="automatic/DICTIONARY.list"; } if(inside==3){ printf "%s", $0 > dfile; } } if($1=="//+PLUMEDOC"){ - if( $2=="TOPOLOGY" || $2=="COLVAR" || $2=="MCOLVAR" || $2=="DCOLVAR"|| $2=="MCOLVARF" || $2=="MCOLVARB" || $2=="FUNCTION" || $2=="ANALYSIS" || $2=="BIAS" || $2=="GENERIC" || $2=="VATOM" || $2=="TOOLS" ){ + if( $2=="TOPOLOGY" || $2=="COLVAR" || $2=="MCOLVAR" || $2=="DCOLVAR"|| $2=="MCOLVARF" || $2=="MCOLVARB" || $2=="MCOLVARA" || $2=="FUNCTION" || $2=="ANALYSIS" || $2=="LANDMARKS" || $2=="DIMRED" || $2=="BIAS" || $2=="GENERIC" || $2=="VATOM" || $2=="TOOLS" ){ lfile="automatic/"$2".list" printf "<tr> <td width=%s> \\subpage %s </td> <td>","5%", $3 > lfile @@ -236,7 +236,7 @@ done sed -ie 's/<b> --/<b> \\c \\-\\-/g' automatic/*.txt rm automatic/*.txte -for file in COLVAR MCOLVAR DCOLVAR MCOLVARF MCOLVARB ANALYSIS BIAS GLOSSARY \ +for file in COLVAR MCOLVAR DCOLVAR MCOLVARF MCOLVARB MCOLVARA ANALYSIS LANDMARKS DIMRED BIAS GLOSSARY \ DICTIONARY TUTORIALS WEBSITES FUNCTION TOPOLOGY VATOM \ TOOLS do @@ -269,7 +269,10 @@ cat $file.txt | /^ *@DCOLVAR@ *$/r automatic/DCOLVAR.list /^ *@MCOLVARF@ *$/r automatic/MCOLVARF.list /^ *@MCOLVARB@ *$/r automatic/MCOLVARB.list + /^ *@MCOLVARA@ *$/r automatic/MCOLVARA.list /^ *@ANALYSIS@ *$/r automatic/ANALYSIS.list + /^ *@LANDMARKS@ *$/r automatic/LANDMARKS.list + /^ *@DIMRED@ *$/r automatic/DIMRED.list /^ *@BIAS@ *$/r automatic/BIAS.list /^ *@GLOSSARY@ *$/r automatic/GLOSSARY.list /^ *@DICTIONARY@ *$/r automatic/DICTIONARY.list -- GitLab