diff --git a/src/tools/Pbc.cpp b/src/tools/Pbc.cpp
index 9a9f8e9a531f987f5b07ccb0fbd0117fabdb749f..ce60029af30a95904609dc3b7b0e0b20f9ab20dd 100644
--- a/src/tools/Pbc.cpp
+++ b/src/tools/Pbc.cpp
@@ -23,6 +23,9 @@
 #include "Tools.h"
 #include "Exception.h"
 #include "LatticeReduction.h"
+#include <iostream>
+#include "Random.h"
+#include <cmath>
 
 namespace PLMD{
 
@@ -33,19 +36,55 @@ Pbc::Pbc():
   invBox.zero();
 }
 
+void Pbc::buildShifts(std::vector<Vector> shifts[2][2][2])const{
+  const double small=1e-28;
+  for(int i=0;i<2;i++) for(int j=0;j<2;j++) for(int k=0;k<2;k++) shifts[i][j][k].clear();
+  for(int l=-1;l<=1;l++) for(int m=-1;m<=1;m++) for(int n=-1;n<=1;n++){
+    int ishift[3]={l,m,n};
+    Vector dshift(l,m,n);
+    unsigned count=0;
+// count how many components are != 0
+    for(int s=0;s<3;s++) if(ishift[s]!=0) count++;
+// skips trivial (0,0,0) and cases with three shifts
+    if(count==0 || count==3) continue;
+// check if that WS face is perpendicular to the axis
+    Vector cosdir=matmul(reduced,transpose(reduced),dshift);
+    double dp=dotProduct(dshift,cosdir);
+    double ref=modulo2(dshift)*modulo2(cosdir);
+    if(std::fabs(ref-dp*dp)<small) continue;
+
+    for(int i=0;i<2;i++) for(int j=0;j<2;j++) for(int k=0;k<2;k++){
+      int block[3]={2*i-1,2*j-1,2*k-1};
+// skip cases where shift would bring too far from origin
+      bool skip=false;
+      for(int s=0;s<3;s++) if(ishift[s]*block[s]>0) skip=true;
+      if(skip) continue;
+      skip=true;
+      for(int s=0;s<3;s++){
+// check that the components of cosdir along the non-shifted directions
+// have the proper sign
+        if(((1-ishift[s]*ishift[s])*block[s])*cosdir[s]<-small) skip=false;
+      }
+      if(skip)continue;
+      shifts[i][j][k].push_back(matmul(transpose(reduced),dshift));
+    }
+  }
+}
+
 void Pbc::fullSearch(Vector&d)const{
+   if(type==unset) return;
    Vector s=matmul(invReduced.transpose(),d);
    for(int i=0;i<3;i++) s[i]=Tools::pbc(s[i]);
    d=matmul(reduced.transpose(),s);
-   const int smax=1;
+   const int smax=4;
    Vector a0(reduced.getRow(0));
    Vector a1(reduced.getRow(1));
    Vector a2(reduced.getRow(2));
    Vector best(d);
    double lbest=d.modulo2();
    for(int i=-smax;i<=smax;i++) for(int j=-smax;j<=smax;j++) for(int k=-smax;k<=smax;k++){
-     int x=i*i+j*j+k*k;
-     if(x==0 || x==3) continue;
+//     int x=i*i+j*j+k*k;
+//     if(x==0 || x==3) continue;
      Vector trial=d+i*a0+j*a1+k*a2;
      double ltrial=trial.modulo2();
      if(ltrial<lbest){
@@ -64,7 +103,7 @@ void Pbc::setBox(const Tensor&b){
 
   type=unset;
   double det=box.determinant();
-  if(det*det<epsilon)return;
+  if(det*det<epsilon) return;
 
   bool cxy=false;
   bool cxz=false;
@@ -76,13 +115,11 @@ void Pbc::setBox(const Tensor&b){
   invBox=box.inverse();
 
   if(cxy && cxz && cyz) type=orthorombic;
-// NOT IMPLEMENTED YET; WILL FALL TO GENERIC
-//  else if(cxy && cxz)   type=yz;
-//  else if(cxy && cyz)   type=xz;
-//  else if(cxz && cyz)   type=xy;
   else type=generic;
-  
+
  if(type==orthorombic){
+   reduced=box;
+   invReduced=inverse(reduced);
     for(unsigned i=0;i<3;i++){
       diag[i]=box[i][i];
       hdiag[i]=0.5*box[i][i];
@@ -92,7 +129,9 @@ void Pbc::setBox(const Tensor&b){
    reduced=box;
    LatticeReduction::reduce(reduced);
    invReduced=inverse(reduced);
- } 
+   buildShifts(shifts);
+}
+
 }
 
 double Pbc::distance( const bool pbc, const Vector& v1, const Vector& v2 ) const {
@@ -100,7 +139,7 @@ double Pbc::distance( const bool pbc, const Vector& v1, const Vector& v2 ) const
   else{ return ( delta(v1,v2) ).modulo(); }
 }
 
-Vector Pbc::distance(const Vector&v1,const Vector&v2)const{
+Vector Pbc::distance(const Vector&v1,const Vector&v2,int*nshifts)const{
   Vector d=delta(v1,v2);
   if(type==unset){
   } else if(type==orthorombic) {
@@ -111,7 +150,35 @@ Vector Pbc::distance(const Vector&v1,const Vector&v2)const{
 //     while(d[i]<=mdiag[i]) d[i]+=diag[i];
 //   }
   } else if(type==generic) {
-   fullSearch(d);
+    Vector s=matmul(d,invReduced);
+// check if images have to be computed:
+//    if((std::fabs(s[0])+std::fabs(s[1])+std::fabs(s[2])>0.5)){
+// NOTICE: the check in the previous line, albeit correct, is breaking many regtest
+//         since it does not apply Tools::pbc in many cases. Moreover, it does not
+//         introduce a significant gain. I thus leave it out for the moment.
+      if(true){
+// bring to -0.5,+0.5 region in scaled coordinates:
+      for(int i=0;i<3;i++) s[i]=Tools::pbc(s[i]);
+      d=matmul(s,reduced);
+// check if shifts have to be attempted:
+      if((std::fabs(s[0])+std::fabs(s[1])+std::fabs(s[2])>0.5)){
+// list of shifts is specific for that "octant" (depends on signs of s[i]):
+        const std::vector<Vector> & myshifts(shifts[(s[0]>0?1:0)][(s[1]>0?1:0)][(s[2]>0?1:0)]);
+        Vector best(d);
+        double lbest(modulo2(best));
+// loop over possible shifts:
+        if(nshifts) *nshifts+=myshifts.size();
+        for(int i=0;i<myshifts.size();i++){
+          Vector trial=d+myshifts[i];
+          double ltrial=modulo2(trial);
+          if(ltrial<lbest){
+            lbest=ltrial;
+            best=trial;
+          }
+        }
+        d=best;
+      }
+    }
   } else plumed_merror("unknown pbc type");
   return d;
 }
@@ -136,6 +203,105 @@ const Tensor& Pbc::getInvBox()const{
   return invBox;
 }
 
+void Pbc::test(){
+  Random r;
+  r.setSeed(-20);
+  for(int i=0;i<1000;i++){
+// random matrix with some zero element
+    Tensor box;
+    for(int j=0;j<3;j++) for(int k=0;k<3;k++) if(r.U01()>0.2){
+      box[j][k]=2.0*r.U01()-1.0;
+    }
+    int boxtype=i%10;
+    switch(boxtype){
+    case 0:
+// cubic
+      for(int j=0;j<3;j++) for(int k=0;k<3;k++) if(j!=k) box[j][k]=0.0;
+      for(int j=1;j<3;j++) box[j][j]=box[0][0];
+      break;
+    case 1:
+// orthorombic
+      for(int j=0;j<3;j++) for(int k=0;k<3;k++) if(j!=k) box[j][k]=0.0;
+      break;
+    case 2:
+// hexagonal
+      {
+      int perm=r.U01()*100;
+      Vector a;
+      a(0)=r.U01()*2-2; a(1)=0.0;a(2)=0.0;
+      double d=r.U01()*2-2;
+      Vector b(0.0,d,0.0);
+      Vector c(0.0,0.5*d,sqrt(3.0)*d*0.5);
+      box.setRow((perm+0)%3,a);
+      box.setRow((perm+1)%3,b);
+      box.setRow((perm+2)%3,c);
+      }
+      break;
+    case 3:
+// bcc
+      {
+      int perm=r.U01()*100;
+      double d=r.U01()*2-2;
+      Vector a(d,d,d);
+      Vector b(d,-d,d);
+      Vector c(d,d,-d);
+      box.setRow((perm+0)%3,a);
+      box.setRow((perm+1)%3,b);
+      box.setRow((perm+2)%3,c);
+      }
+      break;
+    case 4:
+// fcc
+      {
+      int perm=r.U01()*100;
+      double d=r.U01()*2-2;
+      Vector a(d,d,0);
+      Vector b(d,0,d);
+      Vector c(0,d,d);
+      box.setRow((perm+0)%3,a);
+      box.setRow((perm+1)%3,b);
+      box.setRow((perm+2)%3,c);
+      }
+      break;
+    default:
+// triclinic
+      break;
+    }
+
+    Pbc pbc;
+    pbc.setBox(box);
+    std::cerr<<"( "<<boxtype<<" )\n";
+    std::cerr<<"Box:";
+    for(int j=0;j<3;j++) for(int k=0;k<3;k++) std::cerr<<" "<<box[j][k];
+    std::cerr<<"\n";
+    std::cerr<<"Determinant: "<<determinant(box)<<"\n";
+    std::cerr<<"Shifts:";
+    for(int j=0;j<2;j++) for(int k=0;k<2;k++) for(int l=0;l<2;l++) std::cerr<<" "<<pbc.shifts[j][k][l].size();
+    std::cerr<<"\n";
+    int nshifts=0;
+    int ntot=10000;
+    for(int j=0;j<ntot;j++){
+      Vector v(r.U01()-0.5,r.U01()-0.5,r.U01()-0.5);
+      v*=5;
+      for(int j=0;j<3;j++) if(r.U01()>0.2) v(j)=0.0;
+      Vector full(v);
+      Vector fast=pbc.distance(Vector(0,0,0),v,&nshifts);
+      full=fast;
+
+      pbc.fullSearch(full);
+  
+     if(modulo2(fast-full)>1e-10) {
+        std::cerr<<"orig "<<v[0]<<" "<<v[1]<<" "<<v[2]<<"\n";
+        std::cerr<<"fast "<<fast[0]<<" "<<fast[1]<<" "<<fast[2]<<"\n";
+        std::cerr<<"full "<<full[0]<<" "<<full[1]<<" "<<full[2]<<"\n";
+        std::cerr<<"diff "<<modulo2(fast)-modulo2(full)<<std::endl;
+        if(std::fabs(modulo2(fast)-modulo2(full))>1e-15) plumed_error();
+     }
+    }
+    std::cerr<<"Average number of shifts: "<<double(nshifts)/double(ntot)<<"\n";
+  }
+}
+
 
 
 }
diff --git a/src/tools/Pbc.h b/src/tools/Pbc.h
index bb193146d02087f5df5868e69fe0ca33bc4bcf83..238d9ee43d1078bdaa91e62b8d72e840377f12c1 100644
--- a/src/tools/Pbc.h
+++ b/src/tools/Pbc.h
@@ -24,21 +24,26 @@
 
 #include "Vector.h"
 #include "Tensor.h"
+#include <vector>
+#include <cstddef>
 
 namespace PLMD{
 
 class Pbc{
-  enum {unset,orthorombic,xy,xz,yz,generic} type;
+  enum {unset,orthorombic,generic} type;
   Tensor box;
   Tensor invBox;
   Tensor reduced;
   Tensor invReduced;
+  std::vector<Vector> shifts[2][2][2];
   Vector diag,hdiag,mdiag;
+  void buildShifts(std::vector<Vector> shifts[2][2][2])const;
   void fullSearch(Vector&)const;
 public:
+  static void test();
   Pbc();
   double distance( const bool pbc, const Vector& v1, const Vector& v2 ) const;
-  Vector distance(const Vector&,const Vector&)const;
+  Vector distance(const Vector&,const Vector&,int*nshifts=NULL)const;
   void setBox(const Tensor&);
   const Tensor& getBox()const;
   const Tensor& getInvBox()const;