From 2758843f957bee5065e44c71bf4699f03820b030 Mon Sep 17 00:00:00 2001 From: Giovanni Bussi <giovanni.bussi@gmail.com> Date: Tue, 12 Jul 2011 17:30:59 +0200 Subject: [PATCH] Optimized MPI communication I do not understand why, but on sp6 asynch communication is not very efficient. For mroe then 10 processes, I switch on an old-style allgather based atom sharing. --- src/Atoms.cpp | 69 +++++++++++++++++++++++++++++----------- src/PlumedCommunicator.h | 22 +++++++++++++ 2 files changed, 72 insertions(+), 19 deletions(-) diff --git a/src/Atoms.cpp b/src/Atoms.cpp index 8dc5bd1a8..46dfb6b98 100644 --- a/src/Atoms.cpp +++ b/src/Atoms.cpp @@ -81,12 +81,16 @@ void Atoms::share(){ mdatoms->getCharges(gatindex,charges); mdatoms->getPositions(gatindex,positions); if(dd && int(gatindex.size())<natoms){ + bool async=dd.Get_size()<10; +// async=true; std::set<int> unique; for(unsigned i=0;i<requestset.size();i++){ if(requestset[i]->isActive()) unique.insert(requestset[i]->unique.begin(),requestset[i]->unique.end()); } - for(unsigned i=0;i<dd.mpi_request_positions.size();i++) dd.mpi_request_positions[i].wait(); - for(unsigned i=0;i<dd.mpi_request_index.size();i++) dd.mpi_request_index[i].wait(); + if(async){ + for(unsigned i=0;i<dd.mpi_request_positions.size();i++) dd.mpi_request_positions[i].wait(); + for(unsigned i=0;i<dd.mpi_request_index.size();i++) dd.mpi_request_index[i].wait(); + } int count=0; for(std::set<int>::const_iterator p=unique.begin();p!=unique.end();++p){ if(dd.g2l[*p]>=0){ @@ -99,11 +103,34 @@ void Atoms::share(){ count++; } } - dd.mpi_request_positions.resize(dd.Get_size()); - dd.mpi_request_index.resize(dd.Get_size()); - for(int i=0;i<dd.Get_size();i++){ - dd.mpi_request_index[i]=dd.Isend(&dd.indexToBeSent[0],count,i,666); - dd.mpi_request_positions[i]=dd.Isend(&dd.positionsToBeSent[0],5*count,i,667); + if(async){ + dd.mpi_request_positions.resize(dd.Get_size()); + dd.mpi_request_index.resize(dd.Get_size()); + for(int i=0;i<dd.Get_size();i++){ + dd.mpi_request_index[i]=dd.Isend(&dd.indexToBeSent[0],count,i,666); + dd.mpi_request_positions[i]=dd.Isend(&dd.positionsToBeSent[0],5*count,i,667); + } + }else{ + const int n=(dd.Get_size()); + vector<int> counts(n); + vector<int> displ(n); + vector<int> counts5(n); + vector<int> displ5(n); + dd.Allgather(&count,1,&counts[0],1); + displ[0]=0; + for(int i=1;i<n;++i) displ[i]=displ[i-1]+counts[i-1]; + for(int i=1;i<n;++i) counts5[i]=counts[i]*5; + for(int i=1;i<n;++i) displ5[i]=displ[i]*5; + dd.Allgatherv(&dd.indexToBeSent[0],count,&dd.indexToBeReceived[0],&counts[0],&displ[0]); + dd.Allgatherv(&dd.positionsToBeSent[0],5*count,&dd.positionsToBeReceived[0],&counts5[0],&displ5[0]); + int tot=displ[n-1]+counts[n-1]; + for(int i=0;i<tot;i++){ + positions[dd.indexToBeReceived[i]][0]=dd.positionsToBeReceived[5*i+0]; + positions[dd.indexToBeReceived[i]][1]=dd.positionsToBeReceived[5*i+1]; + positions[dd.indexToBeReceived[i]][2]=dd.positionsToBeReceived[5*i+2]; + masses[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+3]; + charges[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+4]; + } } } } @@ -114,18 +141,22 @@ void Atoms::wait(){ // receive toBeReceived int count=0; PlumedCommunicator::Status status; - for(int i=0;i<dd.Get_size();i++){ - dd.Recv(&dd.indexToBeReceived[count],dd.indexToBeReceived.size()-count,i,666,status); - int c=status.Get_count<int>(); - dd.Recv(&dd.positionsToBeReceived[5*count],dd.positionsToBeReceived.size()-5*count,i,667); - count+=c; - } - for(int i=0;i<count;i++){ - positions[dd.indexToBeReceived[i]][0]=dd.positionsToBeReceived[5*i+0]; - positions[dd.indexToBeReceived[i]][1]=dd.positionsToBeReceived[5*i+1]; - positions[dd.indexToBeReceived[i]][2]=dd.positionsToBeReceived[5*i+2]; - masses[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+3]; - charges[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+4]; + bool async=dd.Get_size()<10; +// async=true; + if(async){ + for(int i=0;i<dd.Get_size();i++){ + dd.Recv(&dd.indexToBeReceived[count],dd.indexToBeReceived.size()-count,i,666,status); + int c=status.Get_count<int>(); + dd.Recv(&dd.positionsToBeReceived[5*count],dd.positionsToBeReceived.size()-5*count,i,667); + count+=c; + } + for(int i=0;i<count;i++){ + positions[dd.indexToBeReceived[i]][0]=dd.positionsToBeReceived[5*i+0]; + positions[dd.indexToBeReceived[i]][1]=dd.positionsToBeReceived[5*i+1]; + positions[dd.indexToBeReceived[i]][2]=dd.positionsToBeReceived[5*i+2]; + masses[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+3]; + charges[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+4]; + } } if(collectEnergy) dd.Sum(&energy,1); forceOnEnergy=0.0; diff --git a/src/PlumedCommunicator.h b/src/PlumedCommunicator.h index 1b1472f0e..73809c8cc 100644 --- a/src/PlumedCommunicator.h +++ b/src/PlumedCommunicator.h @@ -70,6 +70,8 @@ public: template <class T> void Allgatherv(const T*,int,T*,const int*,const int*); template <class T> + void Allgather(const T*,int,T*,int); + template <class T> Request Isend(T*,int,int,int); template <class T> void Recv(T*,int,int,int,Status&); @@ -122,6 +124,26 @@ void PlumedCommunicator::Allgatherv(const T*sendbuf,int sendcount,T*recvbuf,cons #endif } +template<class T> +void PlumedCommunicator::Allgather(const T*sendbuf,int sendcount,T*recvbuf,int recvcount){ +#if defined(__PLUMED_MPI) + if(initialized()){ + void*s=const_cast<void*>((const void*)sendbuf); + void*r=const_cast<void*>((const void*)recvbuf); + if(s==NULL)s=MPI_IN_PLACE; + MPI_Allgather(s,sendcount,getMPIType<T>(),r,recvcount,getMPIType<T>(),communicator); + } +#else + (void) sendbuf; + (void) sendcount; + (void) recvbuf; + (void) recvcounts; + (void) displs; + assert(0); +#endif +} + + template <class T> PlumedCommunicator::Request PlumedCommunicator::Isend(T*buf,int count,int source,int tag){ Request req; -- GitLab