Skip to content
Snippets Groups Projects
Commit 2758843f authored by Giovanni Bussi's avatar Giovanni Bussi
Browse files

Optimized MPI communication

I do not understand why, but on sp6 asynch communication is not very
efficient. For mroe then 10 processes, I switch on an old-style
allgather based atom sharing.
parent 3acdfc8f
No related branches found
No related tags found
No related merge requests found
......@@ -81,12 +81,16 @@ void Atoms::share(){
mdatoms->getCharges(gatindex,charges);
mdatoms->getPositions(gatindex,positions);
if(dd && int(gatindex.size())<natoms){
bool async=dd.Get_size()<10;
// async=true;
std::set<int> unique;
for(unsigned i=0;i<requestset.size();i++){
if(requestset[i]->isActive()) unique.insert(requestset[i]->unique.begin(),requestset[i]->unique.end());
}
for(unsigned i=0;i<dd.mpi_request_positions.size();i++) dd.mpi_request_positions[i].wait();
for(unsigned i=0;i<dd.mpi_request_index.size();i++) dd.mpi_request_index[i].wait();
if(async){
for(unsigned i=0;i<dd.mpi_request_positions.size();i++) dd.mpi_request_positions[i].wait();
for(unsigned i=0;i<dd.mpi_request_index.size();i++) dd.mpi_request_index[i].wait();
}
int count=0;
for(std::set<int>::const_iterator p=unique.begin();p!=unique.end();++p){
if(dd.g2l[*p]>=0){
......@@ -99,11 +103,34 @@ void Atoms::share(){
count++;
}
}
dd.mpi_request_positions.resize(dd.Get_size());
dd.mpi_request_index.resize(dd.Get_size());
for(int i=0;i<dd.Get_size();i++){
dd.mpi_request_index[i]=dd.Isend(&dd.indexToBeSent[0],count,i,666);
dd.mpi_request_positions[i]=dd.Isend(&dd.positionsToBeSent[0],5*count,i,667);
if(async){
dd.mpi_request_positions.resize(dd.Get_size());
dd.mpi_request_index.resize(dd.Get_size());
for(int i=0;i<dd.Get_size();i++){
dd.mpi_request_index[i]=dd.Isend(&dd.indexToBeSent[0],count,i,666);
dd.mpi_request_positions[i]=dd.Isend(&dd.positionsToBeSent[0],5*count,i,667);
}
}else{
const int n=(dd.Get_size());
vector<int> counts(n);
vector<int> displ(n);
vector<int> counts5(n);
vector<int> displ5(n);
dd.Allgather(&count,1,&counts[0],1);
displ[0]=0;
for(int i=1;i<n;++i) displ[i]=displ[i-1]+counts[i-1];
for(int i=1;i<n;++i) counts5[i]=counts[i]*5;
for(int i=1;i<n;++i) displ5[i]=displ[i]*5;
dd.Allgatherv(&dd.indexToBeSent[0],count,&dd.indexToBeReceived[0],&counts[0],&displ[0]);
dd.Allgatherv(&dd.positionsToBeSent[0],5*count,&dd.positionsToBeReceived[0],&counts5[0],&displ5[0]);
int tot=displ[n-1]+counts[n-1];
for(int i=0;i<tot;i++){
positions[dd.indexToBeReceived[i]][0]=dd.positionsToBeReceived[5*i+0];
positions[dd.indexToBeReceived[i]][1]=dd.positionsToBeReceived[5*i+1];
positions[dd.indexToBeReceived[i]][2]=dd.positionsToBeReceived[5*i+2];
masses[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+3];
charges[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+4];
}
}
}
}
......@@ -114,18 +141,22 @@ void Atoms::wait(){
// receive toBeReceived
int count=0;
PlumedCommunicator::Status status;
for(int i=0;i<dd.Get_size();i++){
dd.Recv(&dd.indexToBeReceived[count],dd.indexToBeReceived.size()-count,i,666,status);
int c=status.Get_count<int>();
dd.Recv(&dd.positionsToBeReceived[5*count],dd.positionsToBeReceived.size()-5*count,i,667);
count+=c;
}
for(int i=0;i<count;i++){
positions[dd.indexToBeReceived[i]][0]=dd.positionsToBeReceived[5*i+0];
positions[dd.indexToBeReceived[i]][1]=dd.positionsToBeReceived[5*i+1];
positions[dd.indexToBeReceived[i]][2]=dd.positionsToBeReceived[5*i+2];
masses[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+3];
charges[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+4];
bool async=dd.Get_size()<10;
// async=true;
if(async){
for(int i=0;i<dd.Get_size();i++){
dd.Recv(&dd.indexToBeReceived[count],dd.indexToBeReceived.size()-count,i,666,status);
int c=status.Get_count<int>();
dd.Recv(&dd.positionsToBeReceived[5*count],dd.positionsToBeReceived.size()-5*count,i,667);
count+=c;
}
for(int i=0;i<count;i++){
positions[dd.indexToBeReceived[i]][0]=dd.positionsToBeReceived[5*i+0];
positions[dd.indexToBeReceived[i]][1]=dd.positionsToBeReceived[5*i+1];
positions[dd.indexToBeReceived[i]][2]=dd.positionsToBeReceived[5*i+2];
masses[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+3];
charges[dd.indexToBeReceived[i]] =dd.positionsToBeReceived[5*i+4];
}
}
if(collectEnergy) dd.Sum(&energy,1);
forceOnEnergy=0.0;
......
......@@ -70,6 +70,8 @@ public:
template <class T>
void Allgatherv(const T*,int,T*,const int*,const int*);
template <class T>
void Allgather(const T*,int,T*,int);
template <class T>
Request Isend(T*,int,int,int);
template <class T>
void Recv(T*,int,int,int,Status&);
......@@ -122,6 +124,26 @@ void PlumedCommunicator::Allgatherv(const T*sendbuf,int sendcount,T*recvbuf,cons
#endif
}
template<class T>
void PlumedCommunicator::Allgather(const T*sendbuf,int sendcount,T*recvbuf,int recvcount){
#if defined(__PLUMED_MPI)
if(initialized()){
void*s=const_cast<void*>((const void*)sendbuf);
void*r=const_cast<void*>((const void*)recvbuf);
if(s==NULL)s=MPI_IN_PLACE;
MPI_Allgather(s,sendcount,getMPIType<T>(),r,recvcount,getMPIType<T>(),communicator);
}
#else
(void) sendbuf;
(void) sendcount;
(void) recvbuf;
(void) recvcounts;
(void) displs;
assert(0);
#endif
}
template <class T>
PlumedCommunicator::Request PlumedCommunicator::Isend(T*buf,int count,int source,int tag){
Request req;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment