diff --git a/src/crystallization/VectorMultiColvar.cpp b/src/crystallization/VectorMultiColvar.cpp index 39927875ecbfe2509d1e8ddc1422031701a62e01..7f542c62cac65a16a02587c7df331d7798bb94a1 100644 --- a/src/crystallization/VectorMultiColvar.cpp +++ b/src/crystallization/VectorMultiColvar.cpp @@ -28,6 +28,23 @@ namespace crystallization { void VectorMultiColvar::registerKeywords( Keywords& keys ){ MultiColvar::registerKeywords( keys ); + keys.setComponentsIntroduction("When the label of this action is used as the input for a second you are not referring to a scalar quantity as you are in " + "regular collective variables. The label is used to reference the full set of vectors calculated by " + "the action. This is usual when using \\ref multicolvarfunction. Generally when doing this the previously calculated " + "multicolvar will be referenced using the DATA keyword rather than ARG.\n\n" + "This Action can be used to calculate the following scalar quantities directly. These quantities are calculated by " + "employing the keywords listed below. " + "These quantities can then be referenced elsewhere in the input file by using this Action's label " + "followed by a dot and the name of the quantity. All of them can be calculated multiple times " + "with different parameters. In this case the quantities calculated can be referenced elsewhere in the " + "input by using the name of the quantity followed by a numerical identifier " + "e.g. <em>label</em>.lessthan-1, <em>label</em>.lessthan-2 etc. When doing this and, for clarity we have " + "made the label of the components customizable. As such by using the LABEL keyword in the description of the keyword " + "input you can customize the component name. In addition, you can calculate all of these scalar functions for " + "one particular component of the calculated vector by making use of the COMPONENT keyword. The first component is used to " + "refer to the norm of the vector. The individual components can then be referenced using the numbers 2, 3, and so on. So " + "as an example MEAN1={COMPONET=1} calculates the average vector norm. MEAN2={COMPONENT=2} by contrast calculates the mean " + "for all of the first components of the vectors."); } VectorMultiColvar::VectorMultiColvar(const ActionOptions& ao): diff --git a/src/multicolvar/PAMM.cpp b/src/multicolvar/PAMM.cpp index 1d35ed7b80113846491f32e01593747c607fe44b..a7b51a67a048fd481bc74ca1a7e5593fd7387948 100644 --- a/src/multicolvar/PAMM.cpp +++ b/src/multicolvar/PAMM.cpp @@ -28,12 +28,76 @@ /* Probabilistic analysis of molecular mofifs. -This is an implementation of the methods discussed in \cite{pamm} +Probabilistic analysis of molecular motifs (PAMM) was introduced in this paper \cite{pamm}. +The essence of this approach involves calculating some large set of collective variables +for a set of atoms in a short trajectory and fitting this data using a Gaussian Mixture Model. +The idea is that modes in these distributions can be used to identify features such as hydrogen bonds or +secondary structure types. -\warning Mixing periodic and aperiodic variables in input has not been tested +The assumption within this implementation is that the fitting of the Gaussian mixture model has been +done elsewhere by a separate code. You thus provide an input file to this action which contains the +means, covariances and weights for a set of Gaussian kernels, \f$\{ \phi \}\f$. The values and +derivatives for the following set of quantities is then computed: + +\f[ +s_k = \frac{ \phi_k}{ \sum_i \phi_i } +\f] + +Each of the \f$\phi_k\f$ is a Gaussian function that acts on a set of quantities calculated within +a \ref mcolv. These might be \ref TORSIONS, \ref DISTANCES, \ref ANGLES or any one of the many +symmetry functions that are available within \ref mcolv actions. These quantities are then inserted into +the set of \f$n\f$ kernels that are in the the input file. This will be done for multiple sets of values +for the input quantities and a final quantity will be calculated by summing the above \f$s_k\f$ values or +some transformation of the above. This sounds less complicated than it is and is best understood by +looking through the example given below. + +\warning Mixing periodic and aperiodic \ref mcolv actions has not been tested \par Examples +In this example I will explain in detail what the following input is computing: + +\verbatim +MOLINFO MOLTYPE=protein STRUCTURE=M1d.pdb +psi: TORSIONS ATOMS1=@psi-2 ATOMS2=@psi-3 ATOMS3=@psi-4 +phi: TORSIONS ATOMS1=@phi-2 ATOMS2=@phi-3 ATOMS3=@phi-4 +p: PAMM DATA=phi,psi CLUSTERS=clusters.dat MEAN1={COMPONENT=1} MEAN2={COMPONENT=2} +PRINT ARG=p.mean-1,mean-2 FILE=colvar +\endverbatim + +The best place to start our explanation is to look at the contents of the clusters.dat file + +\verbatim +#! FIELDS weight center-phi center-psi width-phi-phi width-phi-psi width-psi-phi width-psi-psi + 0.4 -1.0 -1.0 0.2 -0.1 -0.1 0.2 + 0.6 1.0 +1.0 0.1 -0.03 -0.03 0.1 +\endverbatim + +This files contains the parameters of two two-dimensional Gaussian functions. Each of these Gaussians has a weight, \f$w_k\f$, +a vector that specifies the position of its centre, \f$\mathbf{c}_\f$, and a covariance matrix, \f$\Sigma_k\f$. The \f$\phi_k\f$ functions that +we use to calculate our PAMM components are thus: + +\f[ +\phi_k = \frac{w_k}{N_k} \exp\left( -(\mathbf{s} - \mathbf{c}_k)^T \Sigma^{-1}_k (\mathbf{s} - \mathbf{c}_k) \right) +\f] + +In the above \f$N_k\f$ is a normalisation factor that is calculated based on \f$\Sigma\f$. The vector \f$\mathbf{s}\f$ is a vector of quantities +that are calculated by the \ref TORSIONS actions. This vector must be two dimensional and in this case each component is the value of a +torsion angle. If we look at the two \ref TORSIONS actions in the above we are calculating the \f$\phi\f$ and \f$\psi\f$ backbone torsional +angles in a protein (Note the use of \ref MOLFINTO to make specification of atoms straightforward). We thus calculate the values of our +2 \f$ \{ \phi \} \f$ kernels 3 times. The first time we use the \f$\phi\f$ and \f$\psi\f$ angles in the 2nd resiude of the protein, +the second time it is the \f$\phi\f$ and \f$\psi\f$ angles of the 3rd residue of the protein and the third time it is the \f$\phi\f$ and \f$\psi\f$ angles +of the 4th residue in the protein. The final two quantities that are output by the print command, p.mean-1 and p.mean-2, are the averages +over these three residues for the quantities: +\f[ +s_1 = \frac{ \phi_1}{ \phi_1 + \phi_2 } +\f] +and +\f[ +s_2 = \frac{ \phi_2}{ \phi_1 + \phi_2 } +\f] +There is a great deal of flexibility in this input. We can work with, and examine, any number of components, we can use any set of collective variables +and compute these PAMM variables and we can transform the PAMM variables themselves in a large number of different ways when computing these sums. */ //+ENDPLUMEDOC @@ -68,6 +132,21 @@ void PAMM::registerKeywords( Keywords& keys ){ keys.add("compulsory","CLUSTERS","the name of the file that contains the definitions of all the clusters"); keys.add("compulsory","REGULARISE","0.001","don't allow the denominator to be smaller then this value"); keys.use("MEAN"); keys.use("MORE_THAN"); keys.use("SUM"); keys.use("LESS_THAN"); + keys.setComponentsIntroduction("When the label of this action is used as the input for a second you are not referring to a scalar quantity as you are in " + "regular collective variables. The label is used to reference the full set of quantities calculated by " + "the action. This is usual when using \\ref multicolvarfunction. Generally when doing this the set of PAMM variables " + "will be referenced using the DATA keyword rather than ARG.\n\n" + "This Action can be used to calculate the following scalar quantities directly from the underlying set of PAMM variables. " + "These quantities are calculated by employing the keywords listed below and they can be referenced elsewhere in the input " + "file by using this Action's label followed by a dot and the name of the quantity. The particular PAMM variable that should " + "be averaged in a MEAN command or transformed by a swiching function in a LESS_THAN command is specified using the COMPONENT " + "keyword. COMPONENT=1 refers to the PAMM variable in which the first kernel in your input file is on the numerator, COMPONENT=2 refers to " + "PAMM variable in which the second kernel in the input file is on the numerator and so on. The same quantity can be calculated " + "multiple times for different PAMM components by a single PAMM action. In this case the relevant keyword must appear multiple " + "times on the input line followed by a numerical identifier i.e. MEAN1, MEAN2, ... The quantities calculated when multiple " + "MEAN commands appear on the input line can be referenece elsewhere in the input file by using the name of the quantity followed " + "followed by a numerical identifier e.g. <em>label</em>.lessthan-1, <em>label</em>.lessthan-2 etc. Alternatively, you can " + "customize the labels of the quantities by using the LABEL keyword in the description of the keyword."); } PAMM::PAMM(const ActionOptions& ao): diff --git a/src/vesselbase/Histogram.cpp b/src/vesselbase/Histogram.cpp index b8129d451f90a7878f66a03b267b07cea4c7b51a..a3ebc2d24ecfa6ee6e38e8d161b932d482d8d3e1 100644 --- a/src/vesselbase/Histogram.cpp +++ b/src/vesselbase/Histogram.cpp @@ -40,6 +40,7 @@ void Histogram::registerKeywords( Keywords& keys ){ HistogramBead::registerKeywords( keys ); keys.add("compulsory","NBINS","The number of equal width bins you want to divide the range into"); keys.addFlag("NORM",false,"calculate the fraction of values rather than the number"); + keys.add("compulsory","COMPONENT","1","the component of the vector for which to calculate this quantity"); } void Histogram::reserveKeyword( Keywords& keys ){ @@ -52,6 +53,8 @@ ShortcutVessel(da) { bool norm; parseFlag("NORM",norm); std::string normstr=""; if(norm) normstr=" NORM"; + std::string compstr; parse("COMPONENT",compstr); + normstr+=" COMPONENT=" + compstr; std::vector<std::string> bins; HistogramBead::generateBins( getAllInput(), bins ); for(unsigned i=0;i<bins.size();++i) addVessel("BETWEEN",bins[i] + normstr); } diff --git a/src/vesselbase/ValueVessel.cpp b/src/vesselbase/ValueVessel.cpp index a0bc9ade325468aaddd135c96a43f25a096c35bc..1bf7440f8c3aa0693fc3f07811f1c42cbb9d1944 100644 --- a/src/vesselbase/ValueVessel.cpp +++ b/src/vesselbase/ValueVessel.cpp @@ -26,7 +26,7 @@ namespace vesselbase{ void ValueVessel::registerKeywords( Keywords& keys ){ Vessel::registerKeywords( keys ); - keys.add("compulsory","COMPONENT","1","the component of the vector for which to calculate this quantity"); + keys.add("compulsory","COMPONENT","1","the component of the vector for which to calculate this quantity"); } ValueVessel::ValueVessel( const VesselOptions& da ):