diff --git a/src/core/Analysis.cpp b/src/core/Analysis.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3a7ac8006efb0b85148575f7c2348a2d2250b24a --- /dev/null +++ b/src/core/Analysis.cpp @@ -0,0 +1,351 @@ +/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Copyright (c) 2012 The plumed team + (see the PEOPLE file at the root of the distribution for a list of names) + + See http://www.plumed-code.org for more information. + + This file is part of plumed, version 2.0. + + plumed is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + plumed is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with plumed. If not, see <http://www.gnu.org/licenses/>. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ + +#include "Analysis.h" +#include "ActionSet.h" +#include "ActionWithValue.h" +#include "PlumedMain.h" +#include "Atoms.h" + +namespace PLMD { + +//+PLUMEDOC INTERNAL reweighting +/* +We can use our knowledge of the Boltzmann distribution in the cannonical ensemble to reweight the data +contained in trajectories. Using this procedure we can take trajectory at temperature \f$T_1\f$ and use it to +extract probabilities at a different temperature, \f$T_2\f$, using: + +\f[ +P(s',t) = \frac{ \sum_{t'}^t \delta( s(x) - s' ) \exp\left( +( \left[\frac{1}{T_1} - \frac{1}{T_2}\right] \frac{U(x,t')}{k_B} \right) }{ \sum_t'^t \exp\left( +\left[\frac{1}{T_1} - \frac{1}{T_2}\right] \frac{U(x,t')}{k_B} \right) } +\f] + +where \f$U(x,t')\f$ is the potential energy of the system. Alternatively, if a static or pseudo-static bias \f$V(x,t')\f$ is acting on +the system we can remove this bias and get the unbiased probability distribution using: + +\f[ +P(s',t) = \frac{ \sum_{t'}^t \delta( s(x) - s' ) \exp\left( +\frac{V(x,t')}{k_B T} \right) }{ \sum_t'^t \exp\left( +\frac{V(x,t')}{k_B T} \right) } +\f] + +*/ +//+ENDPLUMEDOC + +void Analysis::registerKeywords( Keywords& keys ){ + Action::registerKeywords( keys ); + ActionPilot::registerKeywords( keys ); + ActionWithArguments::registerKeywords( keys ); + keys.use("ARG"); + keys.add("compulsory","STRIDE","1","the frequency with which data should be stored for analysis"); + keys.addFlag("USE_ALL_DATA",false,"use the data from the entire trajectory to perform the analysis"); + keys.add("compulsory","RUN","the frequency with which to run the analysis algorithm. This is not required if you specify USE_ALL_DATA"); + keys.addFlag("REWEIGHT_BIAS",false,"reweight the data using all the biases acting on the dynamics. For more information see \\ref reweighting."); + keys.add("optional","TEMP","the system temperature. This is required if you are reweighting."); + keys.add("optional","REWEIGHT_TEMP","reweight data from a trajectory at one temperature and output the probability " + "distribution at a second temperature. For more information see \\ref reweighting. " + "This is not possible during postprocessing."); + keys.addFlag("WRITE_CHECKPOINT",false,"write out a checkpoint so that the analysis can be restarted in a later run"); + keys.add("hidden","REUSE_DATA_FROM","eventually this will allow you to analyse the same set of data multiple times"); + keys.add("hidden","IGNORE_REWEIGHTING","this allows you to ignore any reweighting factors"); + keys.reserveFlag("NOMEMORY",false,"analyse each block of data separately"); +} + +Analysis::Analysis(const ActionOptions&ao): +Action(ao), +ActionPilot(ao), +ActionWithArguments(ao), +single_run(false), +nomemory(true), +write_chq(false), +reusing_data(false), +ignore_reweight(false), +needeng(false), +idata(0), +firstAnalysisDone(false), +old_norm(0.0) +{ + std::string prev_analysis; parse("REUSE_DATA_FROM",prev_analysis); + if( prev_analysis.length()>0 ){ + reusing_data=true; + mydatastash=plumed.getActionSet().selectWithLabel<Analysis*>( prev_analysis ); + if( !mydatastash ) error("could not find analysis action named " + prev_analysis ); + parseFlag("IGNORE_REWEIGHTING",ignore_reweight); + if( ignore_reweight ) log.printf(" reusing data stored by %s but ignoring all reweighting\n",prev_analysis.c_str() ); + else log.printf(" reusing data stored by %s\n",prev_analysis.c_str() ); + } else { + if( keywords.exists("REWEIGHT_BIAS") ){ + bool dobias; parseFlag("REWEIGHT_BIAS",dobias); + if( dobias ){ + std::vector<ActionWithValue*> all=plumed.getActionSet().select<ActionWithValue*>(); + if( all.empty() ) error("your input file is not telling plumed to calculate anything"); + std::vector<Value*> arg( getArguments() ); + log.printf(" reweigting using the following biases "); + for(unsigned j=0;j<all.size();j++){ + std::string flab; flab=all[j]->getLabel() + ".bias"; + if( all[j]->exists(flab) ){ + biases.push_back( all[j]->copyOutput(flab) ); + arg.push_back( all[j]->copyOutput(flab) ); + log.printf(" %s",flab.c_str()); + } + } + log.printf("\n"); + if( biases.size()==0 ) error("you are asking to reweight bias but there does not appear to be a bias acting on your system"); + requestArguments( arg ); + } + } + + simtemp=0.; parse("TEMP",simtemp); + if( simtemp==0 && biases.size()>0 ) error("to reweight you must specify a temperature use TEMP"); + rtemp=0; + if( keywords.exists("REWEIGHT_TEMP") ) parse("REWEIGHT_TEMP",rtemp); + if( rtemp!=0 ){ + if( simtemp==0 ) error("to reweight you must specify a temperature use TEMP"); + needeng=true; + log.printf(" reweighting simulation at %f to probabilities at temperature %f\n",simtemp,rtemp); + } + + parseFlag("USE_ALL_DATA",single_run); + if( !single_run ){ + std::string fstr; parse("RUN",freq ); + log.printf(" running analysis every %d steps\n",freq); + if( freq%getStride()!= 0 ) error("Frequncy of running is not a multiple of the stride"); + ndata=std::floor(freq/getStride() ); + data.resize( ndata ); + for(unsigned i=0;i<ndata;++i){ data[i].resize( getNumberOfArguments() ); } + logweights.resize( ndata ); + weights.resize( ndata ); + } else { + log.printf(" analyzing all data in trajectory\n"); + args.resize( getNumberOfArguments() ); + } + if( keywords.exists("NOMEMORY") ){ nomemory=false; parseFlag("NOMEMORY",nomemory); } + if(nomemory) log.printf(" doing a separate analysis for each block of data\n"); + parseFlag("WRITE_CHECKPOINT",write_chq); + if( write_chq && single_run ){ + write_chq=false; + warning("ignoring WRITE_CHECKPOINT flag because we are analyzing all data"); + } + + // We need no restart file if we are just collecting data and analyzing all of it + std::string filename = getName() + "_" + getLabel() + ".chkpnt"; + if( write_chq ) rfile.link(*this); + if( plumed.getRestart() ){ + if( single_run ) error("cannot restart histogram when using the USE_ALL_DATA option"); + if( !write_chq ) warning("restarting without writing a checkpoint file is somewhat strange"); + // Read in data from input file + readDataFromFile( filename ); + // Setup the restart file (append mode) + if( write_chq ) rfile.open( filename.c_str(), "aw" ); + // Run the analysis if we stoped in the middle of it last time + if( idata==ndata ) runAnalysis(); + log.printf(" restarting analysis with %d points read from restart file\n",idata); + } else if( write_chq ){ + // Setup the restart file (delete any old one) + remove( filename.c_str() ); + rfile.open( filename.c_str(), "w+" ); + } + if( write_chq ){ + rfile.addConstantField("old_normalization"); + for(unsigned i=0;i<getNumberOfArguments();++i) rfile.setupPrintValue( getPntrToArgument(i) ); + } + } +} + +void Analysis::readDataFromFile( const std::string& filename ){ + double tstep, oldtstep; PlumedIFile ifile; + if( !ifile.FileExist(filename) ) error("failed to find required restart file " + filename ); + ifile.open(filename); + + bool first=true; + while(ifile.scanField("time",tstep)){ + if( !first && ((tstep-oldtstep) - getStride()*plumed.getAtoms().getTimeStep())>plumed.getAtoms().getTimeStep() ){ + error("frequency of data storage in " + filename + " is not equal to frequency of data storage plumed.dat file"); + } + for(unsigned j=0;j<getNumberOfArguments();++j){ + ifile.scanField( getPntrToArgument(j)->getName(), data[idata][j] ); + } + ifile.scanField("log_weight",logweights[idata]); + ifile.scanField("old_normalization",old_norm); + ifile.scanField(); + idata++; first=false; oldtstep=tstep; + } + ifile.close(); + if(old_norm>0) firstAnalysisDone=true; +} + +void Analysis::prepare(){ + if(needeng) plumed.getAtoms().setCollectEnergy(true); +} + +void Analysis::calculate(){ + // Don't store the first step (also don't store if we are getting data from elsewhere) + if( getStep()==0 || reusing_data ) return; + // Retrieve the bias + double bias=0.0; for(unsigned i=0;i<biases.size();++i) bias+=biases[i]->get(); + + double ww=0; + if(needeng){ + double energy=plumed.getAtoms().getEnergy()+bias; + // Reweighting because of temperature difference + ww=-( (1.0/rtemp) - (1.0/simtemp) )*(energy+bias) / plumed.getAtoms().getKBoltzmann(); + // Reweighting because of biases + if( biases.size()>0 ) ww += bias/( plumed.getAtoms().getKBoltzmann()*simtemp ); + } + + if(single_run){ + // Get the arguments and store them in a vector of vectors + for(unsigned i=0;i<getNumberOfArguments();++i) args[i]=getArgument(i); + data.push_back( args ); + logweights.push_back(ww); + } else { + // Get the arguments and store them in a vector of vectors + for(unsigned i=0;i<getNumberOfArguments();++i) data[idata][i]=getArgument(i); + logweights[idata] = ww; + } + // Write data to checkpoint file + if( write_chq ) { + rfile.printField("time",getTime()); rfile.printField("old_normalization",old_norm); + for(unsigned i=0;i<getNumberOfArguments();++i) rfile.printField( getPntrToArgument(i), getArgument(i) ); + rfile.printField("log_weight",logweights[idata]); rfile.printField(); + } + // Increment data counter + idata++; +} + +Analysis::~Analysis(){ + if( write_chq ) rfile.close(); +} + +void Analysis::finalizeWeights( const bool& ignore_weights ){ + // Check that we have the correct ammount of data + if( !reusing_data && idata!=logweights.size() ) error("something has gone wrong. Am trying to run analysis but I don't have sufficient data"); + if( weights.size()!=logweights.size() ) weights.resize( logweights.size() ); + + norm=0; // Reset normalization constant + if( ignore_weights ){ + for(unsigned i=0;i<logweights.size();++i){ + weights[i]=1.0; norm+=1.0; + } + } else if( nomemory ){ + // Find the maximum weight + double maxweight=logweights[0]; + for(unsigned i=1;i<getNumberOfDataPoints();++i){ + if(logweights[i]>maxweight) maxweight=logweights[i]; + } + // Calculate normalization constant + for(unsigned i=0;i<logweights.size();++i){ + norm+=exp( logweights[i]-maxweight ); + } + // Calculate weights (no memory) + for(unsigned i=0;i<logweights.size();++i){ + weights[i]=exp( logweights[i]-maxweight ); + } + // Calculate normalized weights (with memory) + } else { + // Calculate normalization constant + for(unsigned i=0;i<logweights.size();++i){ + norm+=exp( logweights[i] ); + } + if( !firstAnalysisDone ) old_norm=1.0; + // Calculate weights (with memory) + for(unsigned i=0;i<logweights.size();++i){ + weights[i] = exp( logweights[i] ) / old_norm; + } + if( !firstAnalysisDone ) old_norm=0.0; + } +} + +void Analysis::runAnalysis(){ + + // close the restart file so it is flushed + if( write_chq ) rfile.close(); + + // Note : could add multiple walkers here - simply read in the data from all + // other walkers here if we are writing the check points. + + // Calculate the final weights from the log weights + if( !reusing_data ){ + finalizeWeights( ignore_reweight ); + } else { + mydatastash->finalizeWeights( ignore_reweight ); + norm=mydatastash->retrieveNorm(); + } + // And run the analysis + performAnalysis(); idata=0; + // Update total normalization constant + old_norm+=norm; firstAnalysisDone=true; + + // Delete the checkpoint file + if( write_chq ){ + std::string filename = getName() + "_" + getLabel() + ".chkpnt"; + if( remove( filename.c_str() )!=0 ) warning("problem deleting checkpoint file " + filename ); + // If we are running more than one calculation only reopen the restart file + if( !single_run ) rfile.open( filename.c_str(), "w+" ); + } +} + +double Analysis::getNormalization() const { + if( nomemory || !firstAnalysisDone ) return norm; + return ( 1. + norm/old_norm ); +} + +void Analysis::update(){ + if( !single_run ){ + if( getStep()>0 && getStep()%freq==0 ) runAnalysis(); + else if( idata==logweights.size() ) error("something has gone wrong. Probably a wrong initial time on restart"); + } +} + +bool Analysis::getPeriodicityInformation(const unsigned& i, std::string& dmin, std::string& dmax){ + bool isperiodic=getPntrToArgument(i)->isPeriodic(); + if(isperiodic) getPntrToArgument(i)->getDomain(dmin,dmax); + return isperiodic; +} + +void Analysis::runFinalJobs() { + if( !single_run ) return; + if( getNumberOfDataPoints()==0 ) error("no data is available for analysis"); + runAnalysis(); +} + +std::string Analysis::saveResultsFromPreviousAnalyses( const std::string filename ){ + FILE* ff=std::fopen( filename.c_str() ,"r"); + // Perhaps replace this with an warning and a backup at some stage + if(ff && !firstAnalysisDone) error("found file named " + filename + " from previous calculation"); + FILE* fff=NULL; std::string num, backup; + if( ff ){ + FILE* fff=NULL; + for(unsigned i=1;;i++){ + Tools::convert(i,num); + backup=filename + "." + num; + fff=std::fopen(backup.c_str(),"r"); + if(!fff) break; + } + int check=rename(filename.c_str(), backup.c_str() ); + plumed_massert(check==0,"renaming " + filename + " to " + backup + " failed"); + } else { + return ""; + } + if(ff) fclose(ff); + if(fff) fclose(fff); + return backup; +} + +} diff --git a/src/core/Analysis.h b/src/core/Analysis.h new file mode 100644 index 0000000000000000000000000000000000000000..68264128f57e1e7c75b6ff01e4ad06a0bc7eafb1 --- /dev/null +++ b/src/core/Analysis.h @@ -0,0 +1,162 @@ +/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Copyright (c) 2012 The plumed team + (see the PEOPLE file at the root of the distribution for a list of names) + + See http://www.plumed-code.org for more information. + + This file is part of plumed, version 2.0. + + plumed is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + plumed is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with plumed. If not, see <http://www.gnu.org/licenses/>. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +#ifndef __PLUMED_Analysis_h +#define __PLUMED_Colvar_h + +#include "ActionPilot.h" +#include "ActionWithArguments.h" + +#define PLUMED_ANALYSIS_INIT(ao) Action(ao),Analysis(ao) + +namespace PLMD { + +/** +\ingroup INHERIT +This is the abstract base class to use for implementing new methods for analyzing the trajectory, within it there +is information as to how to go about implementing a new analysis method. + +*/ + +class Analysis : + public ActionPilot, + public ActionWithArguments + { +private: +/// Are we running only once for the whole trajectory + bool single_run; +/// Are we treating each block of data separately + bool nomemory; +/// Are we writing a checkpoint file + bool write_chq; +/// Are we reusing data stored by another analysis action + bool reusing_data; +/// If we are reusing data are we ignoring the reweighting in that data + bool ignore_reweight; +/// The Analysis action that we are reusing data from + Analysis* mydatastash; +/// The frequency with which we are performing analysis + unsigned freq; +/// Number of data point we need to run analysis + unsigned ndata; +/// The temperature at which we are running the calculation + double simtemp; +/// The temperature at which we want the histogram + double rtemp; +/// Do we need the energy (are we reweighting at a different temperature) + bool needeng; +/// The biases we are using in reweighting + std::vector<Value*> biases; +/// The piece of data we are inserting + unsigned idata; +/// Tempory vector to store values of arguments + std::vector<double> args; +/// The data we are going to analyze + std::vector<std::vector<double> > data; +/// The weights of all the data points + std::vector<double> logweights, weights; +/// Have we analyzed the data for the first time + bool firstAnalysisDone; +/// The value of the old normalization constant + double norm, old_norm; +/// The checkpoint file + PlumedOFile rfile; +/// Read in data from a file + void readDataFromFile( const std::string& filename ); +/// This retrieves the value of norm from the analysis action. +/// It is only used to transfer data from one analysis action to +/// another. You should never need to use it. If you think you need it +/// you probably need getNormalization() + double retrieveNorm() const ; +protected: +/// Return the number of arguments (this overwrites the one in ActionWithArguments) + unsigned getNumberOfArguments() const; +/// Return the number of data points + unsigned getNumberOfDataPoints() const; +/// Retrieve the ith point + void getDataPoint( const unsigned& idata, std::vector<double>& point, double& weight ) const ; +/// Returns true if argument i is periodic together with the domain + bool getPeriodicityInformation(const unsigned& i, std::string& dmin, std::string& dmax); +/// Return the normalization constant + double getNormalization() const; +/// Are we analyzing each data block separately (if we are not this also returns the old normalization ) + bool usingMemory() const; +/// Save the results in files from previous runs of the analysis algorithm + std::string saveResultsFromPreviousAnalyses( const std::string filename ); +/// Convert the stored log weights to proper weights + void finalizeWeights( const bool& ignore_weights ); +public: + static void registerKeywords( Keywords& keys ); + Analysis(const ActionOptions&); + ~Analysis(); + void prepare(); + void calculate(); + void update(); + virtual void performAnalysis()=0; + void apply(){} + void runFinalJobs(); + void runAnalysis(); +}; + +inline +unsigned Analysis::getNumberOfDataPoints() const { + if( !reusing_data ){ + plumed_assert( data.size()==logweights.size() ); + return data.size(); + } else { + return mydatastash->getNumberOfDataPoints(); + } +} + +inline +void Analysis::getDataPoint( const unsigned& idata, std::vector<double>& point, double& weight ) const { + if( !reusing_data ){ + plumed_assert( idata<weights.size() && point.size()==getNumberOfArguments() ); + for(unsigned i=0;i<point.size();++i) point[i]=data[idata][i]; + weight=weights[idata]; + } else { + return mydatastash->getDataPoint( idata, point, weight ); + } +} + +inline +bool Analysis::usingMemory() const { + if( !reusing_data ){ + return !nomemory; + } else { + return mydatastash->usingMemory(); + } +} + +inline +unsigned Analysis::getNumberOfArguments() const { + unsigned nargs=ActionWithArguments::getNumberOfArguments(); + return nargs - biases.size(); +} + +inline +double Analysis::retrieveNorm() const { + return norm; +} + +} + +#endif diff --git a/src/core/AnalysisHistogram.cpp b/src/core/AnalysisHistogram.cpp new file mode 100644 index 0000000000000000000000000000000000000000..15167b1a5dc76546f76c938883bf96fd6345d377 --- /dev/null +++ b/src/core/AnalysisHistogram.cpp @@ -0,0 +1,183 @@ +/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Copyright (c) 2012 The plumed team + (see the PEOPLE file at the root of the distribution for a list of names) + + See http://www.plumed-code.org for more information. + + This file is part of plumed, version 2.0. + + plumed is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + plumed is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with plumed. If not, see <http://www.gnu.org/licenses/>. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +#include "Analysis.h" +#include "ActionRegister.h" +#include "tools/Grid.h" +#include "KernelFunctions.h" + +namespace PLMD{ + +//+PLUMEDOC ANALYSIS HISTOGRAM +/* +Calculate the probability density as a function of a few CVs using kernel density estimation + +\par Examples + +The following input monitors two torsional angles during a simulation +and outputs a histogram as a function of them at the end of the simulation. +\verbatim +TORSION ATOMS=1,2,3,4 LABEL=r1 +TORSION ATOMS=2,3,4,5 LABEL=r2 +HISTOGRAM ... + ARG=r1,r2 + USE_ALL_DATA + GRID_MIN=-3.14,-3.14 + GRID_MAX=3.14,3.14 + GRID_BIN=200,200 + BANDWIDTH=0.05,0.05 + GRID_WFILE=histo +... HISTOGRAM +\endverbatim + +The following input monitors two torsional angles during a simulation +and outputs the histogram accumulated thus far every 100000 steps. +\verbatim +TORSION ATOMS=1,2,3,4 LABEL=r1 +TORSION ATOMS=2,3,4,5 LABEL=r2 +HISTOGRAM ... + ARG=r1,r2 + RUN=100000 + GRID_MIN=-3.14,-3.14 + GRID_MAX=3.14,3.14 + GRID_BIN=200,200 + BANDWIDTH=0.05,0.05 + GRID_WFILE=histo +... HISTOGRAM +\endverbatim + +The following input monitors two torsional angles during a simulation +and outputs a separate histogram for each 100000 steps worth of trajectory. +\verbatim +TORSION ATOMS=1,2,3,4 LABEL=r1 +TORSION ATOMS=2,3,4,5 LABEL=r2 +HISTOGRAM ... + ARG=r1,r2 + RUN=100000 NOMEMORY + GRID_MIN=-3.14,-3.14 + GRID_MAX=3.14,3.14 + GRID_BIN=200,200 + BANDWIDTH=0.05,0.05 + GRID_WFILE=histo +... HISTOGRAM +\endverbatim + +*/ +//+ENDPLUMEDOC + +class AnalysisHistogram : public Analysis { +private: + std::vector<std::string> gmin, gmax; + std::vector<double> point, bw; + std::vector<unsigned> gbin; + unsigned nfiles; + bool nomemory; + std::string gridfname; + std::string kerneltype; +public: + static void registerKeywords( Keywords& keys ); + AnalysisHistogram(const ActionOptions&ao); + void performAnalysis(); +}; + +PLUMED_REGISTER_ACTION(AnalysisHistogram,"HISTOGRAM") + +void AnalysisHistogram::registerKeywords( Keywords& keys ){ + Analysis::registerKeywords( keys ); + keys.add("compulsory","GRID_MIN","the lower bounds for the grid"); + keys.add("compulsory","GRID_MAX","the upper bounds for the grid"); + keys.add("compulsory","GRID_BIN","the number of bins for the grid"); + keys.add("compulsory","KERNEL","gaussian","the kernel function you are using. More details on the kernels available in plumed can be found in \\ref kernelfunctions."); + keys.add("compulsory","BANDWIDTH","the bandwdith for kernel density estimation"); + keys.add("compulsory","GRID_WFILE","histogram","the file on which to write the grid"); + keys.use("NOMEMORY"); +} + +AnalysisHistogram::AnalysisHistogram(const ActionOptions&ao): +PLUMED_ANALYSIS_INIT(ao), +point(getNumberOfArguments()), +bw(getNumberOfArguments()), +gmin(getNumberOfArguments()), +gmax(getNumberOfArguments()), +gbin(getNumberOfArguments()), +nfiles(0) +{ + // Read stuff for Grid + parseVector("GRID_MIN",gmin); + parseVector("GRID_MAX",gmax); + parseVector("GRID_BIN",gbin); + parse("GRID_WFILE",gridfname); + // Read stuff for window functions + parseVector("BANDWIDTH",bw); + // Read the type of kernel we are using + parse("KERNEL",kerneltype); + checkRead(); + + log.printf(" Using %s kernel functions\n",kerneltype.c_str() ); + log.printf(" Grid min"); + for(unsigned i=0;i<gmin.size();++i) log.printf(" %s",gmin[i].c_str() ); + log.printf("\n"); + log.printf(" Grid max"); + for(unsigned i=0;i<gmax.size();++i) log.printf(" %s",gmax[i].c_str() ); + log.printf("\n"); + log.printf(" Grid bin"); + for(unsigned i=0;i<gbin.size();++i) log.printf(" %d",gbin[i]); + log.printf("\n"); +} + +void AnalysisHistogram::performAnalysis(){ + // Back up old histogram files + std::string oldfname=saveResultsFromPreviousAnalyses( gridfname ); + + // Get pbc stuff for grid + std::vector<bool> pbc; std::string dmin,dmax; + for(unsigned i=0;i<getNumberOfArguments();++i){ + pbc.push_back( getPeriodicityInformation(i,dmin,dmax) ); + if(pbc[i]){ Tools::convert(dmin,gmin[i]); Tools::convert(dmax,gmax[i]); } + } + Grid* gg; + if( oldfname.length()>0 && usingMemory() ){ + PlumedIFile oldf; oldf.link(*this); oldf.open(oldfname); + gg = Grid::create( "probs", getArguments(), oldf, gmin, gmax, gbin, false, false, false ); + oldf.close(); + } else { + gg = new Grid( "probs", getArguments(), gmin, gmax, gbin,false,false); + } + + // Now build the histogram + double weight; std::vector<double> point( getNumberOfArguments() ); + for(unsigned i=0;i<getNumberOfDataPoints();++i){ + getDataPoint( i, point, weight ); + KernelFunctions kernel( point, bw, kerneltype, weight, true); + gg->addKernel( kernel ); + + } + // Normalize the histogram + gg->scaleAllValuesAndDerivatives( 1.0 / getNormalization() ); + + // Write the grid to a file + PlumedOFile gridfile; gridfile.link(*this); + gridfile.open( gridfname ); gg->writeToFile( gridfile ); + // Close the file + gridfile.close(); delete gg; +} + +} diff --git a/src/core/KernelFunctions.cpp b/src/core/KernelFunctions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..395014e06d67bf35c1c5ab07c746fd54af1ddb28 --- /dev/null +++ b/src/core/KernelFunctions.cpp @@ -0,0 +1,224 @@ +#include "KernelFunctions.h" + +namespace PLMD { + +//+PLUMEDOC INTERNAL kernelfunctions +/* +Constructing histograms is something you learnt to do relatively early in life. You perform an experiment a number of times, +count the number of times each result comes up and then draw a bar graph that describes how often each of the results came up. +This only works when there are a finite number of possible results. If the result a number between 0 and 1 the bar chart is +less easy to draw as there are as many possible results as there are numbers between zero and one - an infinite number. +To resolve this problem we replace probability, \f$P\f$ with probability density, \f$\pi\f$, and write the probability of getting +a number between \f$a\f$ and \f$b\f$ as: + +\f[ +P = \int_{a}^b \textrm{d}x \pi(x) +\f] + +To calculate probability densities from a set of results we use a process called kernel density estimation. +Histograms are accumulated by adding up kernel functions, \f$K\f$, with finite spatial extent, that integrate to one. +These functions are centered on each of the \f$n\f$-dimensional data points, \f$\mathbf{x}_i\f$. The overall effect of this +is that each result we obtain in our experiments contributes to the probability density in a finite sized region of the space. + +Expressing all this mathematically in kernel density estimation we write the probability density as: + +\f[ +\pi(\mathbf{x}) = \sum_i K\left[ (\mathbf{x} - \mathbf{x}_i)^T \Sigma (\mathbf{x} - \mathbf{x}_i) \right] +\f] + +where \f$\Sigma\f$ is an \f$n \times n\f$ matrix called the bandwidth that controls the spatial extent of +the kernel. Whenever we accumulate a histogram (e.g. in \ref HISTOGRAM or in \ref METAD) we use this +technique. + +There is thus some flexibility in the particular function we use for \f$K[\mathbf{r}]\f$ in the above. +The following variants are available. + +<table align=center frame=void width=95%% cellpadding=5%%> +<tr> +<td> TYPE </td> <td> FUNCTION </td> +</tr> <tr> +<td> gaussian </td> <td> \f$f(r) = \frac{1}{(2 \pi)^{n} \sqrt{|\Sigma^{-1}|}} \exp\left(-0.5 r^2 \right)\f$ </td> +</tr> <tr> +<td> triangular </td> <td> \f$f(r) = \frac{3}{V} ( 1 - | r | )H(1-|r|) \f$ </td> +</tr> <tr> +<td> uniform </td> <td> \f$f(r) = \frac{1}{V}H(1-|r|)\f$ </td> +</tr> +</table> + +In the above \f$H(y)\f$ is a function that is equal to one when \f$y>0\f$ and zero when \f$y \le 0\f$. \f$n\f$ is +the dimensionality of the vector \f$\mathbf{x}\f$ and \f$V\f$ is the volume of an elipse in an \f$n\f$ dimensional +space which is given by: + +\f{eqnarray*}{ +V &=& | \Sigma^{-1} | \frac{ \pi^{\frac{n}{2}} }{\left( \frac{n}{2} \right)! } \qquad \textrm{for even} \quad n \\ +V &=& | \Sigma^{-1} | \frac{ 2^{\frac{n+1}{2}} \pi^{\frac{n-1}{2}} }{ n!! } +\f} + +In \ref METAD the normalization constants are ignored so that the value of the function at \f$r=0\f$ is equal +to one. In addition in \ref METAD we must be able to differentiate the bias in order to get forces. This limits +the kernels we can use in this method. +*/ +//+ENDPLUMEDOC + +KernelFunctions::KernelFunctions( const std::vector<double>& at, const std::vector<double>& sig, const std::string& type, const double& w, const bool& norm ): +center(at), +width(sig) +{ + unsigned ncv=center.size(); + if( width.size()==ncv ) diagonal=true; + else if( width.size()==(ncv*(ncv+1))/2 ) diagonal=false; + else plumed_massert(0,"specified sigma is neither diagonal or full covariance matrix"); + + // Setup the kernel type + if(type=="GAUSSIAN" || type=="gaussian"){ + ktype=gaussian; + } else if(type=="UNIFORM" || type=="uniform"){ + ktype=uniform; + } else if(type=="TRIANGULAR" || type=="triangular"){ + ktype=triangular; + } + + if( norm ){ + double det; + if(diagonal){ + det=1; for(unsigned i=0;i<width.size();++i) det*=width[i]; + } else { + unsigned ncv=ndim(); + Matrix<double> mymatrix( getMatrix() ), myinv( ncv, ncv ); + Invert(mymatrix,myinv); double logd; + logdet( myinv, logd ); + det=exp(logd); + } + double volume; + if( ktype==gaussian ){ + for(unsigned i=0;i<width.size();++i) det*=width[i]; + volume=pow( 2*pi, 0.5*ncv ) * pow( det, 0.5 ); + } else if( ktype==uniform || ktype==triangular ){ + if( ncv%2==1 ){ + double dfact=1; + for(unsigned i=1;i<ncv;i+=2) dfact*=static_cast<double>(i); + volume=( pow( pi, (ncv-1)/2 ) ) * ( pow( 2., (ncv+1)/2 ) ) / dfact; + } else { + double fact=1.; + for(unsigned i=1;i<ncv/2;++i) fact*=static_cast<double>(i); + volume=pow( pi,ncv/2 ) / fact; + } + if(ktype==uniform) volume*=det; + else if(ktype==triangular) volume*=det / 3.; + } else { + plumed_merror("not a valid kernel type"); + } + height=w / volume; + } else { + height=w; + } +} + +double KernelFunctions::getCutoff( const double& width ) const { + double DP2CUTOFF=6.25; + if( ktype==gaussian ) return sqrt(2.0*DP2CUTOFF)*width; + else if(ktype==triangular ) return width; + else if(ktype==uniform) return width; + else plumed_merror("No valid kernel type"); + return 0.0; +} + +std::vector<unsigned> KernelFunctions::getSupport( const std::vector<double>& dx ) const { + plumed_assert( ndim()==dx.size() ); + std::vector<unsigned> support( dx.size() ); + if(diagonal){ + for(unsigned i=0;i<dx.size();++i) support[i]=static_cast<unsigned>(ceil( getCutoff(width[i])/dx[i] )); + } else { + unsigned ncv=ndim(); + Matrix<double> mymatrix( getMatrix() ), myinv( ncv,ncv ); + Invert(mymatrix,myinv); + Matrix<double> myautovec(ncv,ncv); std::vector<double> myautoval(ncv); + diagMat(myinv,myautoval,myautovec); + for(unsigned i=0;i<dx.size();++i){ + double extent=fabs(sqrt(myautoval[0])*myautovec(i,0)); + support[i]=static_cast<unsigned>(ceil( getCutoff( extent )/dx[i] )); + } + } + return support; +} + +double KernelFunctions::evaluate( const std::vector<Value*>& pos, std::vector<double>& derivatives, bool usederiv ) const { + plumed_assert( pos.size()==ndim() && derivatives.size()==ndim() ); + if( usederiv ) plumed_massert( ktype!=uniform, "step function can not be differentiated" ); + + double r2=0; + if(diagonal){ + for(unsigned i=0;i<ndim();++i){ + derivatives[i]=pos[i]->difference( center[i] ) / width[i]; + r2+=derivatives[i]*derivatives[i]; + } + } else { + Matrix<double> mymatrix( getMatrix() ); + for(unsigned i=0;i<mymatrix.nrows();++i){ + double dp_i, dp_j; derivatives[i]=0; + dp_i=pos[i]->difference( center[i] ); + for(unsigned j=0;j<mymatrix.ncols();++j){ + if(i==j) dp_j=dp_i; + else dp_j=pos[j]->difference( center[j] ); + + derivatives[i]+=mymatrix(i,j)*dp_j; + r2+=dp_i*dp_j*mymatrix(i,j); + } + } + } + double kderiv, kval; + if(ktype==gaussian){ + kval=height*exp(-0.5*r2); kderiv=-kval; + } else { + double r=sqrt(r2); + if(ktype==triangular){ + if( r<1.0 ){ + if(r==0) kderiv=0; + kderiv=-1; kval=height*( 1. - fabs(r) ); + } else { + kval=0.; kderiv=0.; + } + } else if(ktype==uniform){ + kderiv=0.; + if(r<1.0) kval=height; + else kval=0; + } else { + plumed_merror("Not a valid kernel type"); + } + kderiv*=height / r ; + } + for(unsigned i=0;i<ndim();++i) derivatives[i]*=kderiv; + return kval; +} + +KernelFunctions* KernelFunctions::read( PlumedIFile* ifile, const std::vector<std::string>& valnames ){ + std::string sss; ifile->scanField("multivariate",sss); + std::vector<double> cc( valnames.size() ), sig; + if( sss=="true" ){ + sig.resize( valnames.size() ); + for(unsigned i=0;i<valnames.size();++i){ + ifile->scanField(valnames[i],cc[i]); + ifile->scanField("sigma_"+valnames[i],sig[i]); + } + } else if( sss=="false" ){ + unsigned ncv=valnames.size(); + sig.resize( (ncv*(ncv+1))/2 ); + Matrix<double> upper(ncv,ncv), lower(ncv,ncv); + for(unsigned i=0;i<ncv;++i){ + ifile->scanField(valnames[i],cc[i]); + for(unsigned j=0;j<ncv-i;j++){ ifile->scanField("sigma_" +valnames[j+i] + "_" + valnames[j], lower(j+i,j) ); upper(j,j+i)=lower(j+i,j); } + } + Matrix<double> mymult( ncv, ncv ), invmatrix(ncv,ncv); + mult(lower,upper,mymult); Invert( mymult, invmatrix ); + unsigned k=0; + for(unsigned i=0;i<ncv;i++){ + for(unsigned j=i;j<ncv;j++){ sig[k]=invmatrix(i,j); k++; } + } + } else { + plumed_merror("multivariate flag should equal true or false"); + } + double h; ifile->scanField("height",h); + return new KernelFunctions( cc, sig, "gaussian", h, false ); +} + +} diff --git a/src/core/KernelFunctions.h b/src/core/KernelFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..9ce0efedfbad43e9e5b86ff66f29bc1b5db7264a --- /dev/null +++ b/src/core/KernelFunctions.h @@ -0,0 +1,88 @@ +/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Copyright (c) 2012 The plumed team + (see the PEOPLE file at the root of the distribution for a list of names) + + See http://www.plumed-code.org for more information. + + This file is part of plumed, version 2.0. + + plumed is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + plumed is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with plumed. If not, see <http://www.gnu.org/licenses/>. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +#ifndef __PLUMED_KernelFunctions_h +#define __PLUMED_KernelFunctions_h + +#include "tools/Tools.h" +#include "tools/Matrix.h" +#include "Value.h" +#include <vector> +#include <math.h> + +namespace PLMD { + +class KernelFunctions { +private: +/// Is the metric matrix diagonal + bool diagonal; +/// What type of kernel are we using + enum {gaussian,uniform,triangular} ktype; +/// The center of the kernel function + std::vector<double> center; +/// The width of the kernel + std::vector<double> width; +/// The height of the kernel + double height; +/// Convert the width into matrix form + Matrix<double> getMatrix() const; +/// Get the cutoff for a kernel + double getCutoff( const double& width ) const ; +public: +/// Does the kernel have derivatives + bool hasderivatives; + KernelFunctions( const std::vector<double>& at, const std::vector<double>& sig, const std::string& type, const double& w, const bool& norm ); +/// Get the dimensionality of the kernel + unsigned ndim() const; +/// Get the position of the center + std::vector<double> getCenter() const; +/// Get the support + std::vector<unsigned> getSupport( const std::vector<double>& dx ) const; +/// Evaluate the kernel function + double evaluate( const std::vector<Value*>& pos, std::vector<double>& derivatives, bool usederiv=true ) const; +/// Read a kernel function from a file + static KernelFunctions* read( PlumedIFile* ifile, const std::vector<std::string>& valnames ); +}; + +inline +Matrix<double> KernelFunctions::getMatrix() const { + unsigned k=0, ncv=ndim(); Matrix<double> mymatrix(ncv,ncv); + for(unsigned i=0;i<ncv;i++){ + for(unsigned j=i;j<ncv;j++){ + mymatrix(i,j)=mymatrix(j,i)=width[k]; // recompose the full inverse matrix + k++; + } + } + return mymatrix; +} + +inline +unsigned KernelFunctions::ndim() const { + return center.size(); +} + +inline +std::vector<double> KernelFunctions::getCenter() const { + return center; +} + +} +#endif