diff --git a/src/analysis/LandmarkSelectionBase.cpp b/src/analysis/LandmarkSelectionBase.cpp index d6ecce52ed847d9c2546992089641df9a6178bb1..cff31bf575ee5ab8c4ae6b8b85b3cff53702370f 100644 --- a/src/analysis/LandmarkSelectionBase.cpp +++ b/src/analysis/LandmarkSelectionBase.cpp @@ -57,21 +57,29 @@ void LandmarkSelectionBase::performAnalysis(){ if( !novoronoi ){ lweights.assign(lweights.size(),0.0); - unsigned rank=comm.Get_rank(), size=comm.Get_size(); - for(unsigned i=rank;i<mydata->getNumberOfDataPoints();i+=size){ - unsigned closest=0; - double mindist=mydata->getDissimilarity( i, landmark_indices[0] ); - for(unsigned j=1;j<nlandmarks;++j){ - double dist=mydata->getDissimilarity( i, landmark_indices[j] ); - if( dist<mindist ){ mindist=dist; closest=j; } - } - lweights[closest] += mydata->getWeight(i); - } - comm.Sum( &lweights[0], lweights.size() ); + std::vector<unsigned> tmpass( mydata->getNumberOfDataPoints() ); + voronoiAnalysis( landmark_indices, lweights, tmpass ); } else { for(unsigned i=0;i<nlandmarks;++i) lweights[i]=getWeight( landmark_indices[i] ); } } +void LandmarkSelectionBase::voronoiAnalysis( const std::vector<unsigned>& myindices, std::vector<double>& lweights, std::vector<unsigned>& assignments ) const { + plumed_dbg_assert( myindices.size()==lweights.size() && assignments.size()==mydata->getNumberOfDataPoints() ); + lweights.assign( lweights.size(), 0 ); + unsigned rank=comm.Get_rank(), size=comm.Get_size(); + for(unsigned i=rank;i<mydata->getNumberOfDataPoints();i+=size){ + assignments[i]=0; + double mindist=mydata->getDissimilarity( i, myindices[0] ); + for(unsigned j=1;j<nlandmarks;++j){ + double dist=mydata->getDissimilarity( i, myindices[j] ); + if( dist<mindist ){ mindist=dist; assignments[i]=j; } + } + lweights[ assignments[i] ] += mydata->getWeight(i); + } + comm.Sum( &lweights[0], lweights.size() ); + comm.Sum( &assignments[0], assignments.size() ); +} + } } diff --git a/src/analysis/LandmarkSelectionBase.h b/src/analysis/LandmarkSelectionBase.h index 801c39ee14228c730eddea80dcbb120d03707e38..cf4bab5b97fe541213244de01c8cc861c47e2bb7 100644 --- a/src/analysis/LandmarkSelectionBase.h +++ b/src/analysis/LandmarkSelectionBase.h @@ -41,6 +41,8 @@ private: protected: /// Transfer frame i in the underlying action to the object we are going to analyze void selectFrame( const unsigned& ); +/// Do a voronoi analysis + void voronoiAnalysis( const std::vector<unsigned>& myindices, std::vector<double>& lweights, std::vector<unsigned>& assignments ) const ; public: static void registerKeywords( Keywords& keys ); LandmarkSelectionBase( const ActionOptions& ao ); diff --git a/src/analysis/LandmarkStaged.cpp b/src/analysis/LandmarkStaged.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ffc72c78363c06b9caa2e49ed4a0941c6f7dc778 --- /dev/null +++ b/src/analysis/LandmarkStaged.cpp @@ -0,0 +1,135 @@ +/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Copyright (c) 2012-2014 The plumed team + (see the PEOPLE file at the root of the distribution for a list of names) + + See http://www.plumed-code.org for more information. + + This file is part of plumed, version 2. + + plumed is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + plumed is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with plumed. If not, see <http://www.gnu.org/licenses/>. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +#include "LandmarkSelectionBase.h" +#include "core/ActionRegister.h" +#include "tools/Random.h" +#include <iostream> + +//+PLUMEDOC LANDMARKS LANDMARK_SELECT_STAGED +/* +Select a set of landmarks using the staged algorithm. + +\par Examples + +*/ +//+ENDPLUMEDOC + + +namespace PLMD { +namespace analysis { + +class LandmarkStaged : public LandmarkSelectionBase { +private: + unsigned seed; + double gamma; +public: + static void registerKeywords( Keywords& keys ); + LandmarkStaged( const ActionOptions& ao ); + void selectLandmarks(); +}; + +PLUMED_REGISTER_ACTION(LandmarkStaged,"LANDMARK_SELECT_STAGED") + +void LandmarkStaged::registerKeywords( Keywords& keys ){ + LandmarkSelectionBase::registerKeywords(keys); + keys.add("compulsory","GAMMA","the gamma parameter to be used in weights"); + keys.add("compulsory","SEED","1234","a random number seed"); +} + +LandmarkStaged::LandmarkStaged( const ActionOptions& ao ): +Action(ao), +LandmarkSelectionBase(ao) +{ + parse("SEED",seed); parse("GAMMA",gamma); + log.printf(" probability of selecting voronoi polyhedra equal to exp(-weight/%f) \n", gamma ); +} + +void LandmarkStaged::selectLandmarks(){ + unsigned int n = getNumberOfDataPoints(); // The number of landmarks to pick + unsigned int N = mydata->getNumberOfDataPoints(); // The total number of frames we can choose from + unsigned int m = static_cast<int>( sqrt(n*N) ); + std::vector<unsigned> fpslandmarks(m); + // Select first point at random + Random random; random.setSeed(-seed); double rand=random.RandU01(); + fpslandmarks[0] = std::floor( N*rand ); + + // using FPS we want to find m landmarks where m = sqrt(nN) + // Now find distance to all other points + Matrix<double> distances( m, N ); + for(unsigned int i=0;i<N;++i) { + distances(0,i) = mydata->getDissimilarity( fpslandmarks[0], i ); + } + + // Now find all other landmarks + for(unsigned i=1;i<m;++i){ + // Find point that has the largest minimum distance from the landmarks selected thus far + double maxd=0; + for(unsigned j=0;j<N;++j){ + double mind=distances(0,j); + for(unsigned k=1;k<i;++k){ + if( distances(k,j)<mind ){ mind=distances(k,j); } + } + if( mind>maxd ){ maxd=mind; fpslandmarks[i]=j; } + } + for(unsigned k=0;k<N;++k) distances(i,k) = mydata->getDissimilarity( fpslandmarks[i], k ); + } + + // Initial FPS selection of m landmarks completed + // Now find voronoi weights of these m points + std::vector<unsigned> poly_assign( N ); + std::vector<double> weights( m, 0 ); + voronoiAnalysis( fpslandmarks, weights, poly_assign ); + + //Calulate total weight of voronoi polyhedras + double vweight=0; for(unsigned i=0;i<m;i++) vweight += exp( -weights[i] / gamma ); + + std::vector<bool> selected(N, false); unsigned ncount=0; + while ( ncount<n){ +// generate random number and check which point it belongs to. select only it was not selected before + double rand = vweight*random.RandU01(); + double running_vweight=0; + for(unsigned jpoly=0;jpoly<m;++jpoly){ + running_vweight+=exp( -weights[jpoly] / gamma ); + if( running_vweight>=rand ){ + double tweight=0; + for(unsigned i=0;i<poly_assign.size();++i){ + if( poly_assign[i]==jpoly ) tweight += getWeight( i ); + } + double rand_poly = tweight*random.RandU01(); + double running_tweight=0; + for(unsigned i=0;i<N;++i){ + if( poly_assign[i]==jpoly ){ + running_tweight += getWeight( i ); + if( running_tweight>=rand_poly && !selected[i] ){ + selectFrame(i); selected[i]=true; ncount++; break; + } else if( running_tweight>=rand_poly ){ + break; + } + } + } + } + } + } +} + +} +} diff --git a/src/analysis/ReadDissimilarityMatrix.cpp b/src/analysis/ReadDissimilarityMatrix.cpp index 3f78848c326bcf0e1fea40ff1b63fee0bfe148ab..d6ab575e4a970ff42dea5ccd2faad2315dd317ff 100644 --- a/src/analysis/ReadDissimilarityMatrix.cpp +++ b/src/analysis/ReadDissimilarityMatrix.cpp @@ -109,8 +109,12 @@ void ReadDissimilarityMatrix::update(){ if(!mydata) plumed.stop(); } void ReadDissimilarityMatrix::performAnalysis(){ IFile mfile; mfile.open(fname); // Read in first line - std::vector<std::string> words; Tools::getParsedLine( mfile, words ); - nnodes=words.size(); dissimilarities.resize( nnodes, nnodes ); + std::vector<std::string> words; nnodes=0; + while( nnodes==0 ){ + Tools::getParsedLine( mfile, words ); + nnodes=words.size(); + } + dissimilarities.resize( nnodes, nnodes ); for(unsigned j=0;j<nnodes;++j) Tools::convert( words[j], dissimilarities(0,j) ); for(unsigned i=1;i<nnodes;++i){ diff --git a/src/analysis/SelectRandomFrames.cpp b/src/analysis/SelectRandomFrames.cpp new file mode 100644 index 0000000000000000000000000000000000000000..05834cd28714abbe0f22b85116c20ab6bafb6570 --- /dev/null +++ b/src/analysis/SelectRandomFrames.cpp @@ -0,0 +1,80 @@ +/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + Copyright (c) 2012-2014 The plumed team + (see the PEOPLE file at the root of the distribution for a list of names) + + See http://www.plumed-code.org for more information. + + This file is part of plumed, version 2. + + plumed is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + plumed is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with plumed. If not, see <http://www.gnu.org/licenses/>. ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +#include "LandmarkSelectionBase.h" +#include "core/ActionRegister.h" +#include "../tools/Random.h" + +//+PLUMEDOC LANDMARKS LANDMARK_SELECT_RANDOM +/* +Select a random set of landmarks from a large set of configurations. + +\par Examples + +*/ +//+ENDPLUMEDOC + +namespace PLMD { +namespace analysis { + +class SelectRandomFrames : public LandmarkSelectionBase { +private: + unsigned seed; +public: + static void registerKeywords( Keywords& keys ); + SelectRandomFrames( const ActionOptions& ao ); + void selectLandmarks(); +}; + +PLUMED_REGISTER_ACTION(SelectRandomFrames,"LANDMARK_SELECT_RANDOM") + +void SelectRandomFrames::registerKeywords( Keywords& keys ){ + LandmarkSelectionBase::registerKeywords(keys); + keys.add("compulsory","SEED","1234","a random number seed"); +} + +SelectRandomFrames::SelectRandomFrames( const ActionOptions& ao ): +Action(ao), +LandmarkSelectionBase(ao) +{ + parse("SEED",seed); +} + +void SelectRandomFrames::selectLandmarks(){ + Random r; r.setSeed(-seed); + unsigned nframe=mydata->getNumberOfDataPoints(); + unsigned nland=getNumberOfDataPoints(); + + std::vector<bool> selected( nframe, false ); + + unsigned fcount=0; + while (fcount<nland){ + unsigned iframe = std::floor( r.U01()*nframe ); + if (!selected[iframe]){ + selected[iframe]=true; + selectFrame( iframe ); + ++fcount; + } + } +} + +} +} diff --git a/src/dimred/SketchMapPointwise.cpp b/src/dimred/SketchMapPointwise.cpp index bb168366560febda6b112ab4c007239bc99c4052..645d046a713916d935bb17acf8295b5daf91f917 100644 --- a/src/dimred/SketchMapPointwise.cpp +++ b/src/dimred/SketchMapPointwise.cpp @@ -39,9 +39,8 @@ namespace dimred { class SketchMapPointwise : public SketchMapBase { private: unsigned ncycles; - bool nointerpolation; double cgtol, gbuf; - std::vector<unsigned> npoints; + std::vector<unsigned> npoints, nfgrid; public: static void registerKeywords( Keywords& keys ); SketchMapPointwise( const ActionOptions& ao ); @@ -57,26 +56,33 @@ void SketchMapPointwise::registerKeywords( Keywords& keys ){ keys.add("compulsory","NCYCLES","5","the number of cycles of global optimisation to attempt"); keys.add("compulsory","CGTOL","1E-6","the tolerance for the conjugate gradient minimisation"); keys.add("compulsory","BUFFER","1.1","grid extent for search is (max projection - minimum projection) multiplied by this value"); - keys.add("compulsory","NGRID","10","number of points to use in each grid direction"); - keys.addFlag("NOINTERPOLATION",false,"do not use any interpolation to make the optimisation faster"); + keys.add("compulsory","CGRID_SIZE","10","number of points to use in each grid direction"); + keys.add("compulsory","FGRID_SIZE","0","interpolate the grid onto this number of points -- only works in 2D"); } SketchMapPointwise::SketchMapPointwise( const ActionOptions& ao ): Action(ao), SketchMapBase(ao), -npoints(nlow) +npoints(nlow), +nfgrid(nlow) { - parseVector("NGRID",npoints); parse("BUFFER",gbuf); + parseVector("CGRID_SIZE",npoints); parse("BUFFER",gbuf); if( npoints.size()!=nlow ) error("vector giving number of grid point in each direction has wrong size"); parse("NCYCLES",ncycles); parse("CGTOL",cgtol); - parseFlag("NOINTERPOLATION",nointerpolation); + + parseVector("FGRID_SIZE",nfgrid); + if( nfgrid[0]!=0 && nlow!=2 ) error("interpolation only works in two dimensions"); log.printf(" doing %d cycles of global optimisation sweeps\n",ncycles); - log.printf(" using grid of points that is %d",npoints[0]); - for(unsigned j=1;j<npoints.size();++j) log.printf(" by %d",npoints[j]); + log.printf(" using coarse grid of points that is %d",npoints[0]); log.printf(" and that is %f larger than the difference between the position of the minimum and maximum projection \n",gbuf); + for(unsigned j=1;j<npoints.size();++j) log.printf(" by %d",npoints[j]); + if( nfgrid[0]>0 ){ + log.printf(" interpolating stress onto grid of points that is %d",nfgrid[0]); + for(unsigned j=1;j<nfgrid.size();++j) log.printf(" by %d",nfgrid[j]); + log.printf("\n"); + } log.printf(" tolerance for conjugate gradient algorithm equals %f \n",cgtol); - if( nointerpolation ) log.printf(" not using any interpolation in grid search\n"); } void SketchMapPointwise::minimise( Matrix<double>& projections ){ @@ -97,23 +103,23 @@ void SketchMapPointwise::minimise( Matrix<double>& projections ){ // And do the search ConjugateGradient<SketchMapPointwise> mycgminimise( this ); - GridSearch<SketchMapPointwise> mygridsearch( gmin, gmax, npoints, this ); + GridSearch<SketchMapPointwise> mygridsearch( gmin, gmax, npoints, nfgrid, this ); // Run multiple loops over all projections for(unsigned i=0;i<ncycles;++i){ for(unsigned j=0;j<getNumberOfDataPoints();++j){ // Setup target distances and target functions for calculate stress for(unsigned k=0;k<getNumberOfDataPoints();++k) setTargetDistance( k, distances(j,k) ); - if( nointerpolation ){ - // Minimise using grid search - mygridsearch.minimise( mypoint, &SketchMapPointwise::calculateStress ); + // Find current projection of jth point + for(unsigned k=0;k<mypoint.size();++k) mypoint[k]=projections(j,k); + // Minimise using grid search + bool moved=mygridsearch.minimise( mypoint, &SketchMapPointwise::calculateStress ); + if( moved ){ + // Reassign the new projection + for(unsigned k=0;k<mypoint.size();++k) projections(j,k)=mypoint[k]; // Minimise output using conjugate gradient mycgminimise.minimise( cgtol, projections.getVector(), &SketchMapPointwise::calculateFullStress ); - } else { - plumed_merror("use a class here that Sandip will implement that allows you to do this with some interpolation"); } - // Save new projection - for(unsigned k=0;k<nlow;++k) projections(j,k)=mypoint[k]; } } } diff --git a/src/tools/GridSearch.h b/src/tools/GridSearch.h index 7d52ab226c8671b8cc38a5aef80707a6991a699b..44276df6a7b3990209645a42ca67fec59654ca8c 100644 --- a/src/tools/GridSearch.h +++ b/src/tools/GridSearch.h @@ -23,6 +23,7 @@ #define __PLUMED_tools_GridSearch_h #include "MinimiseBase.h" +#include "Grid.h" #include <iostream> #include <math.h> namespace PLMD{ @@ -34,50 +35,61 @@ private: /// calculating class that calculates the energy typedef double(FCLASS::*engf_pointer)( const std::vector<double>& p, std::vector<double>& der ); FCLASS* myclass_func; - std::vector<unsigned> ngrid; - std::vector<double> min, delr; - unsigned np; + Grid* mygrid; + Grid* myfgrid; public: - GridSearch( const std::vector<double>& mmin, const std::vector<double>& mmax, const std::vector<unsigned>& ng, FCLASS* funcc ) : + GridSearch( const std::vector<double>& mmin, const std::vector<double>& mmax, const std::vector<unsigned>& ng, const std::vector<unsigned>& nfg, FCLASS* funcc ) : myclass_func( funcc ), - ngrid(ng), - min(mmin), - delr(mmin.size()) + myfgrid(NULL) { - // Work out the stride - for(unsigned i=0;i<delr.size();++i) delr[i] = ( mmax[i] - mmin[i] ) / static_cast<double>( ng[i] ); - // And the number of poitns in the grid - np=1; for(unsigned i=0;i<ngrid.size();++i) np*=ngrid[i]; + std::vector<std::string> fake_args( nfg.size() ), gmin( nfg.size() ), gmax( nfg.size() ); + for(unsigned i=0;i<nfg.size();++i){ + Tools::convert(i+1,fake_args[i]); + Tools::convert(mmin[i],gmin[i]); + Tools::convert(mmax[i],gmax[i]); + } + std::vector<bool> isperiodic( nfg.size(), false ); + mygrid = new Grid("searcher",fake_args,gmin,gmax,ng,true,true,true,isperiodic,gmin,gmax); + if( nfg[0]>0 ) myfgrid = new Grid("searcher",fake_args,gmin,gmax,nfg,false,false,true,isperiodic,gmin,gmax); } - void minimise( std::vector<double>& p, engf_pointer myfunc ); + ~GridSearch(){ delete mygrid; if(myfgrid) delete myfgrid; } + bool minimise( std::vector<double>& p, engf_pointer myfunc ); }; template <class FCLASS> -void GridSearch<FCLASS>::minimise( std::vector<double>& p, engf_pointer myfunc ){ +bool GridSearch<FCLASS>::minimise( std::vector<double>& p, engf_pointer myfunc ){ + std::vector<double> der( p.size() ); + double initial_eng = (myclass_func->*myfunc)( p, der ); - std::vector<double> fake_der( p.size() ); std::vector<unsigned> ind( p.size() ); - unsigned pmin=0; double emin=(myclass_func->*myfunc)( min, fake_der ); - for(unsigned i=1;i<np;++i){ - unsigned myind=i; ind[0]=myind%ngrid[0]; - for(unsigned j=1;j<p.size()-1;++j){ - myind=(myind-ind[j-1])/ngrid[j-1]; - ind[j]=myind%ngrid[j]; - } - if( p.size()>=2 ) ind[p.size()-1] = (myind - ind[p.size()-2])/ngrid[p.size()-2]; - for(unsigned j=0;j<p.size();++j) p[j] = min[j] + ind[j]*delr[j]; - - double eng = (myclass_func->*myfunc)( p, fake_der ); + double emin=(myclass_func->*myfunc)( mygrid->getPoint(0), der ); + mygrid->setValueAndDerivatives( 0, emin, der ); unsigned pmin=0; + for(unsigned i=1;i<mygrid->getSize();++i){ + double eng = (myclass_func->*myfunc)( mygrid->getPoint(i), der ); + mygrid->setValueAndDerivatives( i, eng, der ); if( eng<emin ){ emin=eng; pmin=i; } } - // Now recover lowest energy point - ind[0]=pmin%ngrid[0]; - for(unsigned j=1;j<p.size()-1;++j){ - pmin=(pmin-ind[j-1])/ngrid[j-1]; - ind[j]=pmin%ngrid[j]; + if( myfgrid ){ + pmin=0; double emin=mygrid->getValueAndDerivatives( myfgrid->getPoint(0), der ); + for(unsigned i=1;i<myfgrid->getSize();++i){ + double eng = mygrid->getValueAndDerivatives( myfgrid->getPoint(i), der ); + if( eng<emin ){ emin=eng; pmin=i; } + } + double checkEng = (myclass_func->*myfunc)( myfgrid->getPoint(pmin), der ); + if( checkEng<initial_eng ){ + p=myfgrid->getPoint(pmin); + return true; + } else { + return false; + } + } + + if( emin<initial_eng ){ + p=mygrid->getPoint(pmin); + return true; + } else { + return false; } - if( p.size()>=2 ) ind[p.size()-1] = (pmin - ind[p.size()-2])/ngrid[p.size()-2]; - for(unsigned j=0;j<p.size();++j) p[j] = min[j] + ind[j]*delr[j]; } }